OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 447 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
458 Func->deletePhis(); | 458 Func->deletePhis(); |
459 if (Func->hasError()) | 459 if (Func->hasError()) |
460 return; | 460 return; |
461 Func->dump("After Phi lowering"); | 461 Func->dump("After Phi lowering"); |
462 } | 462 } |
463 | 463 |
464 // Address mode optimization. | 464 // Address mode optimization. |
465 Func->getVMetadata()->init(VMK_SingleDefs); | 465 Func->getVMetadata()->init(VMK_SingleDefs); |
466 Func->doAddressOpt(); | 466 Func->doAddressOpt(); |
467 | 467 |
| 468 // Find read-modify-write opportunities. Do this after address mode |
| 469 // optimization so that doAddressOpt() doesn't need to be applied to RMW |
| 470 // instructions as well. |
| 471 findRMW(); |
| 472 Func->dump("After RMW transform"); |
| 473 |
468 // Argument lowering | 474 // Argument lowering |
469 Func->doArgLowering(); | 475 Func->doArgLowering(); |
470 | 476 |
471 // Target lowering. This requires liveness analysis for some parts | 477 // Target lowering. This requires liveness analysis for some parts |
472 // of the lowering decisions, such as compare/branch fusing. If | 478 // of the lowering decisions, such as compare/branch fusing. If |
473 // non-lightweight liveness analysis is used, the instructions need | 479 // non-lightweight liveness analysis is used, the instructions need |
474 // to be renumbered first. TODO: This renumbering should only be | 480 // to be renumbered first. TODO: This renumbering should only be |
475 // necessary if we're actually calculating live intervals, which we | 481 // necessary if we're actually calculating live intervals, which we |
476 // only do for register allocation. | 482 // only do for register allocation. |
477 Func->renumberInstructions(); | 483 Func->renumberInstructions(); |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
572 Func->dump("After stack frame mapping"); | 578 Func->dump("After stack frame mapping"); |
573 | 579 |
574 // Nop insertion | 580 // Nop insertion |
575 if (Ctx->getFlags().shouldDoNopInsertion()) { | 581 if (Ctx->getFlags().shouldDoNopInsertion()) { |
576 Func->doNopInsertion(); | 582 Func->doNopInsertion(); |
577 } | 583 } |
578 } | 584 } |
579 | 585 |
580 namespace { | 586 namespace { |
581 | 587 |
| 588 bool canRMW(const InstArithmetic *Arith) { |
| 589 Type Ty = Arith->getDest()->getType(); |
| 590 bool isI64 = Ty == IceType_i64; |
| 591 bool isVector = isVectorType(Ty); |
| 592 |
| 593 switch (Arith->getOp()) { |
| 594 // Not handled for lack of simple lowering: |
| 595 // shift on i64 and vectors |
| 596 // mul, udiv, urem, sdiv, srem, frem |
| 597 default: |
| 598 return false; |
| 599 case InstArithmetic::Add: |
| 600 return !isI64 && !isVector; // TODO(stichnot): implement i64 and vector |
| 601 case InstArithmetic::Sub: |
| 602 case InstArithmetic::And: |
| 603 case InstArithmetic::Or: |
| 604 case InstArithmetic::Xor: |
| 605 case InstArithmetic::Fadd: |
| 606 case InstArithmetic::Fsub: |
| 607 case InstArithmetic::Fmul: |
| 608 case InstArithmetic::Fdiv: |
| 609 return false; // TODO(stichnot): implement |
| 610 return true; |
| 611 case InstArithmetic::Shl: |
| 612 case InstArithmetic::Lshr: |
| 613 case InstArithmetic::Ashr: |
| 614 return false; // TODO(stichnot): implement |
| 615 return !isI64 && !isVector; |
| 616 } |
| 617 } |
| 618 |
| 619 bool isSameMemAddressOperand(const Operand *A, const Operand *B) { |
| 620 if (A == B) |
| 621 return true; |
| 622 if (auto *MemA = llvm::dyn_cast<OperandX8632Mem>(A)) { |
| 623 if (auto *MemB = llvm::dyn_cast<OperandX8632Mem>(B)) { |
| 624 return MemA->getBase() == MemB->getBase() && |
| 625 MemA->getOffset() == MemB->getOffset() && |
| 626 MemA->getIndex() == MemB->getIndex() && |
| 627 MemA->getShift() == MemB->getShift() && |
| 628 MemA->getSegmentRegister() == MemB->getSegmentRegister(); |
| 629 } |
| 630 } |
| 631 return false; |
| 632 } |
| 633 |
| 634 } // end of anonymous namespace |
| 635 |
| 636 void TargetX8632::findRMW() { |
| 637 OstreamLocker L(Func->getContext()); |
| 638 Ostream &Str = Func->getContext()->getStrDump(); |
| 639 for (CfgNode *Node : Func->getNodes()) { |
| 640 // Walk through the instructions, considering each sequence of 3 |
| 641 // instructions, and look for the particular RMW pattern. Note that this |
| 642 // search can be "broken" (false negatives) if there are intervening deleted |
| 643 // instructions, or intervening instructions that could be safely moved out |
| 644 // of the way to reveal an RMW pattern. |
| 645 auto E = Node->getInsts().end(); |
| 646 auto I1 = E, I2 = E, I3 = Node->getInsts().begin(); |
| 647 for (; I3 != E; I1 = I2, I2 = I3, ++I3) { |
| 648 if (I1 == E || I2 == E) |
| 649 continue; |
| 650 if (I1->isDeleted() || I2->isDeleted() || I3->isDeleted()) |
| 651 continue; |
| 652 if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) { |
| 653 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) { |
| 654 if (auto *Store = llvm::dyn_cast<InstStore>(I3)) { |
| 655 // Look for: |
| 656 // a = Load addr |
| 657 // b = <op> a, other |
| 658 // Store b, addr |
| 659 // Change to: |
| 660 // a = Load addr |
| 661 // b = <op> a, other |
| 662 // x = FakeDef |
| 663 // RMW <op>, addr, other, x |
| 664 // b = Store b, addr, x |
| 665 // Note that inferTwoAddress() makes sure setDestNonKillable() gets |
| 666 // called on the updated Store instruction, to avoid liveness |
| 667 // problems later. |
| 668 // |
| 669 // With this transformation, the Store instruction acquires a Dest |
| 670 // variable and is now subject to dead code elimination if there are |
| 671 // no more uses of "b". Variable "x" is a beacon for determining |
| 672 // whether the Store instruction gets dead-code eliminated. If the |
| 673 // Store instruction is eliminated, then it must be the case that |
| 674 // the RMW instruction ends x's live range, and therefore the RMW |
| 675 // instruction will be retained and later lowered. On the other |
| 676 // hand, if the RMW instruction does not end x's live range, then |
| 677 // the Store instruction must still be present, and therefore the |
| 678 // RMW instruction is ignored during lowering because it is |
| 679 // redundant with the Store instruction. |
| 680 // |
| 681 // Note that if "a" has further uses, the RMW transformation may |
| 682 // still trigger, resulting in two loads and one store, which is |
| 683 // worse than the original one load and one store. However, this is |
| 684 // probably rare, and caching probably keeps it just as fast. |
| 685 if (!isSameMemAddressOperand(Load->getSourceAddress(), |
| 686 Store->getAddr())) |
| 687 continue; |
| 688 if (false && Load->getSourceAddress() != Store->getAddr()) |
| 689 continue; |
| 690 if (Arith->getSrc(0) != Load->getDest()) |
| 691 continue; |
| 692 if (Arith->getDest() != Store->getData()) |
| 693 continue; |
| 694 if (!canRMW(Arith)) |
| 695 continue; |
| 696 if (Func->isVerbose(IceV_RMW)) { |
| 697 Str << "Found RMW in " << Func->getFunctionName() << ":\n "; |
| 698 Load->dump(Func); |
| 699 Str << "\n "; |
| 700 Arith->dump(Func); |
| 701 Str << "\n "; |
| 702 Store->dump(Func); |
| 703 Str << "\n"; |
| 704 } |
| 705 Variable *Beacon = Func->makeVariable(IceType_i32); |
| 706 Beacon->setWeight(0); |
| 707 Store->setRmwBeacon(Beacon); |
| 708 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon); |
| 709 Node->getInsts().insert(I3, BeaconDef); |
| 710 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create( |
| 711 Func, Arith->getSrc(1), Store->getAddr(), Beacon, |
| 712 Arith->getOp()); |
| 713 Node->getInsts().insert(I3, RMW); |
| 714 } |
| 715 } |
| 716 } |
| 717 } |
| 718 } |
| 719 } |
| 720 |
| 721 namespace { |
| 722 |
582 // Converts a ConstantInteger32 operand into its constant value, or | 723 // Converts a ConstantInteger32 operand into its constant value, or |
583 // MemoryOrderInvalid if the operand is not a ConstantInteger32. | 724 // MemoryOrderInvalid if the operand is not a ConstantInteger32. |
584 uint64_t getConstantMemoryOrder(Operand *Opnd) { | 725 uint64_t getConstantMemoryOrder(Operand *Opnd) { |
585 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 726 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
586 return Integer->getValue(); | 727 return Integer->getValue(); |
587 return Intrinsics::MemoryOrderInvalid; | 728 return Intrinsics::MemoryOrderInvalid; |
588 } | 729 } |
589 | 730 |
590 // Determines whether the dest of a Load instruction can be folded | 731 // Determines whether the dest of a Load instruction can be folded |
591 // into one of the src operands of a 2-operand instruction. This is | 732 // into one of the src operands of a 2-operand instruction. This is |
(...skipping 3796 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4388 // not other OperandX8632Mem, so there should be no mention of segment | 4529 // not other OperandX8632Mem, so there should be no mention of segment |
4389 // registers there either. | 4530 // registers there either. |
4390 const OperandX8632Mem::SegmentRegisters SegmentReg = | 4531 const OperandX8632Mem::SegmentRegisters SegmentReg = |
4391 OperandX8632Mem::DefaultSegment; | 4532 OperandX8632Mem::DefaultSegment; |
4392 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); | 4533 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); |
4393 if (Base && Addr != Base) { | 4534 if (Base && Addr != Base) { |
4394 Inst->setDeleted(); | 4535 Inst->setDeleted(); |
4395 Constant *OffsetOp = Ctx->getConstantInt32(Offset); | 4536 Constant *OffsetOp = Ctx->getConstantInt32(Offset); |
4396 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, | 4537 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, |
4397 Shift, SegmentReg); | 4538 Shift, SegmentReg); |
4398 Context.insert(InstStore::create(Func, Data, Addr)); | 4539 InstStore *NewStore = InstStore::create(Func, Data, Addr); |
| 4540 if (Inst->getDest()) |
| 4541 NewStore->setRmwBeacon(Inst->getRmwBeacon()); |
| 4542 Context.insert(NewStore); |
4399 } | 4543 } |
4400 } | 4544 } |
4401 | 4545 |
4402 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { | 4546 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { |
4403 // This implements the most naive possible lowering. | 4547 // This implements the most naive possible lowering. |
4404 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default | 4548 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default |
4405 Operand *Src0 = Inst->getComparison(); | 4549 Operand *Src0 = Inst->getComparison(); |
4406 SizeT NumCases = Inst->getNumCases(); | 4550 SizeT NumCases = Inst->getNumCases(); |
4407 if (Src0->getType() == IceType_i64) { | 4551 if (Src0->getType() == IceType_i64) { |
4408 Src0 = legalize(Src0); // get Base/Index into physical registers | 4552 Src0 = legalize(Src0); // get Base/Index into physical registers |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4491 NextCast->setDeleted(); | 4635 NextCast->setDeleted(); |
4492 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); | 4636 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); |
4493 // Skip over the instruction. | 4637 // Skip over the instruction. |
4494 Context.advanceNext(); | 4638 Context.advanceNext(); |
4495 } | 4639 } |
4496 } | 4640 } |
4497 } | 4641 } |
4498 | 4642 |
4499 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); } | 4643 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); } |
4500 | 4644 |
| 4645 void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) { |
| 4646 // If the beacon variable's live range does not end in this |
| 4647 // instruction, then it must end in the modified Store instruction |
| 4648 // that follows. This means that the original Store instruction is |
| 4649 // still there, either because the value being stored is used beyond |
| 4650 // the Store instruction, or because dead code elimination did not |
| 4651 // happen. In either case, we cancel RMW lowering (and the caller |
| 4652 // deletes the RMW instruction). |
| 4653 if (!RMW->isLastUse(RMW->getBeacon())) |
| 4654 return; |
| 4655 Operand *Src = RMW->getData(); |
| 4656 Type Ty = Src->getType(); |
| 4657 OperandX8632Mem *Addr = formMemoryOperand(RMW->getAddr(), Ty); |
| 4658 if (Ty == IceType_i64) { |
| 4659 // TODO(stichnot): Implement. |
| 4660 } else if (isVectorType(Ty)) { |
| 4661 // TODO(stichnot): Implement. |
| 4662 } else { |
| 4663 // i8, i16, i32, f32, f64 |
| 4664 switch (RMW->getOp()) { |
| 4665 default: |
| 4666 // TODO(stichnot): Implement other arithmetic operators. |
| 4667 break; |
| 4668 case InstArithmetic::Add: |
| 4669 Src = legalize(Src, Legal_Reg | Legal_Imm); |
| 4670 _add_rmw(Addr, Src); |
| 4671 return; |
| 4672 } |
| 4673 } |
| 4674 llvm::report_fatal_error("Couldn't lower RMW instruction"); |
| 4675 } |
| 4676 |
| 4677 void TargetX8632::lowerOther(const Inst *Instr) { |
| 4678 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { |
| 4679 lowerRMW(RMW); |
| 4680 } else { |
| 4681 TargetLowering::lowerOther(Instr); |
| 4682 } |
| 4683 } |
| 4684 |
4501 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to | 4685 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to |
4502 // preserve integrity of liveness analysis. Undef values are also | 4686 // preserve integrity of liveness analysis. Undef values are also |
4503 // turned into zeroes, since loOperand() and hiOperand() don't expect | 4687 // turned into zeroes, since loOperand() and hiOperand() don't expect |
4504 // Undef input. | 4688 // Undef input. |
4505 void TargetX8632::prelowerPhis() { | 4689 void TargetX8632::prelowerPhis() { |
4506 CfgNode *Node = Context.getNode(); | 4690 CfgNode *Node = Context.getNode(); |
4507 for (Inst &I : Node->getPhis()) { | 4691 for (Inst &I : Node->getPhis()) { |
4508 auto Phi = llvm::dyn_cast<InstPhi>(&I); | 4692 auto Phi = llvm::dyn_cast<InstPhi>(&I); |
4509 if (Phi->isDeleted()) | 4693 if (Phi->isDeleted()) |
4510 continue; | 4694 continue; |
(...skipping 604 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5115 emitConstantPool<PoolTypeConverter<float>>(Ctx); | 5299 emitConstantPool<PoolTypeConverter<float>>(Ctx); |
5116 emitConstantPool<PoolTypeConverter<double>>(Ctx); | 5300 emitConstantPool<PoolTypeConverter<double>>(Ctx); |
5117 } break; | 5301 } break; |
5118 } | 5302 } |
5119 } | 5303 } |
5120 | 5304 |
5121 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx) | 5305 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx) |
5122 : TargetHeaderLowering(Ctx) {} | 5306 : TargetHeaderLowering(Ctx) {} |
5123 | 5307 |
5124 } // end of namespace Ice | 5308 } // end of namespace Ice |
OLD | NEW |