Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(179)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1182603004: Subzero: Transform suitable Load/Arith/Store sequences into RMW ops. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Code review changes Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/ias-multi-reloc.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 447 matching lines...) Expand 10 before | Expand all | Expand 10 after
458 Func->deletePhis(); 458 Func->deletePhis();
459 if (Func->hasError()) 459 if (Func->hasError())
460 return; 460 return;
461 Func->dump("After Phi lowering"); 461 Func->dump("After Phi lowering");
462 } 462 }
463 463
464 // Address mode optimization. 464 // Address mode optimization.
465 Func->getVMetadata()->init(VMK_SingleDefs); 465 Func->getVMetadata()->init(VMK_SingleDefs);
466 Func->doAddressOpt(); 466 Func->doAddressOpt();
467 467
468 // Find read-modify-write opportunities. Do this after address mode
469 // optimization so that doAddressOpt() doesn't need to be applied to RMW
470 // instructions as well.
471 findRMW();
472 Func->dump("After RMW transform");
473
468 // Argument lowering 474 // Argument lowering
469 Func->doArgLowering(); 475 Func->doArgLowering();
470 476
471 // Target lowering. This requires liveness analysis for some parts 477 // Target lowering. This requires liveness analysis for some parts
472 // of the lowering decisions, such as compare/branch fusing. If 478 // of the lowering decisions, such as compare/branch fusing. If
473 // non-lightweight liveness analysis is used, the instructions need 479 // non-lightweight liveness analysis is used, the instructions need
474 // to be renumbered first. TODO: This renumbering should only be 480 // to be renumbered first. TODO: This renumbering should only be
475 // necessary if we're actually calculating live intervals, which we 481 // necessary if we're actually calculating live intervals, which we
476 // only do for register allocation. 482 // only do for register allocation.
477 Func->renumberInstructions(); 483 Func->renumberInstructions();
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
572 Func->dump("After stack frame mapping"); 578 Func->dump("After stack frame mapping");
573 579
574 // Nop insertion 580 // Nop insertion
575 if (Ctx->getFlags().shouldDoNopInsertion()) { 581 if (Ctx->getFlags().shouldDoNopInsertion()) {
576 Func->doNopInsertion(); 582 Func->doNopInsertion();
577 } 583 }
578 } 584 }
579 585
580 namespace { 586 namespace {
581 587
588 bool canRMW(const InstArithmetic *Arith) {
589 Type Ty = Arith->getDest()->getType();
590 bool isI64 = Ty == IceType_i64;
591 bool isVector = isVectorType(Ty);
592
593 switch (Arith->getOp()) {
594 // Not handled for lack of simple lowering:
595 // shift on i64 and vectors
596 // mul, udiv, urem, sdiv, srem, frem
597 default:
598 return false;
599 case InstArithmetic::Add:
600 return !isI64 && !isVector; // TODO(stichnot): implement i64 and vector
601 case InstArithmetic::Sub:
602 case InstArithmetic::And:
603 case InstArithmetic::Or:
604 case InstArithmetic::Xor:
605 case InstArithmetic::Fadd:
606 case InstArithmetic::Fsub:
607 case InstArithmetic::Fmul:
608 case InstArithmetic::Fdiv:
609 return false; // TODO(stichnot): implement
610 return true;
611 case InstArithmetic::Shl:
612 case InstArithmetic::Lshr:
613 case InstArithmetic::Ashr:
614 return false; // TODO(stichnot): implement
615 return !isI64 && !isVector;
616 }
617 }
618
619 bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
620 if (A == B)
621 return true;
622 if (auto *MemA = llvm::dyn_cast<OperandX8632Mem>(A)) {
623 if (auto *MemB = llvm::dyn_cast<OperandX8632Mem>(B)) {
624 return MemA->getBase() == MemB->getBase() &&
625 MemA->getOffset() == MemB->getOffset() &&
626 MemA->getIndex() == MemB->getIndex() &&
627 MemA->getShift() == MemB->getShift() &&
628 MemA->getSegmentRegister() == MemB->getSegmentRegister();
629 }
630 }
631 return false;
632 }
633
634 } // end of anonymous namespace
635
636 void TargetX8632::findRMW() {
637 Func->dump("Before RMW");
638 OstreamLocker L(Func->getContext());
639 Ostream &Str = Func->getContext()->getStrDump();
640 for (CfgNode *Node : Func->getNodes()) {
641 // Walk through the instructions, considering each sequence of 3
642 // instructions, and look for the particular RMW pattern. Note that this
643 // search can be "broken" (false negatives) if there are intervening deleted
644 // instructions, or intervening instructions that could be safely moved out
645 // of the way to reveal an RMW pattern.
646 auto E = Node->getInsts().end();
647 auto I1 = E, I2 = E, I3 = Node->getInsts().begin();
648 for (; I3 != E; I1 = I2, I2 = I3, ++I3) {
649 // Make I3 skip over deleted instructions.
650 while (I3 != E && I3->isDeleted())
651 ++I3;
652 if (I1 == E || I2 == E || I3 == E)
653 continue;
654 assert(!I1->isDeleted());
655 assert(!I2->isDeleted());
656 assert(!I3->isDeleted());
657 if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) {
658 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) {
659 if (auto *Store = llvm::dyn_cast<InstStore>(I3)) {
660 // Look for:
661 // a = Load addr
662 // b = <op> a, other
663 // Store b, addr
664 // Change to:
665 // a = Load addr
666 // b = <op> a, other
667 // x = FakeDef
668 // RMW <op>, addr, other, x
669 // b = Store b, addr, x
670 // Note that inferTwoAddress() makes sure setDestNonKillable() gets
671 // called on the updated Store instruction, to avoid liveness
672 // problems later.
673 //
674 // With this transformation, the Store instruction acquires a Dest
675 // variable and is now subject to dead code elimination if there are
676 // no more uses of "b". Variable "x" is a beacon for determining
677 // whether the Store instruction gets dead-code eliminated. If the
678 // Store instruction is eliminated, then it must be the case that
679 // the RMW instruction ends x's live range, and therefore the RMW
680 // instruction will be retained and later lowered. On the other
681 // hand, if the RMW instruction does not end x's live range, then
682 // the Store instruction must still be present, and therefore the
683 // RMW instruction is ignored during lowering because it is
684 // redundant with the Store instruction.
685 //
686 // Note that if "a" has further uses, the RMW transformation may
687 // still trigger, resulting in two loads and one store, which is
688 // worse than the original one load and one store. However, this is
689 // probably rare, and caching probably keeps it just as fast.
690 if (!isSameMemAddressOperand(Load->getSourceAddress(),
691 Store->getAddr()))
692 continue;
693 if (false && Load->getSourceAddress() != Store->getAddr())
694 continue;
695 if (Arith->getSrc(0) != Load->getDest())
696 continue;
697 if (Arith->getDest() != Store->getData())
698 continue;
699 if (!canRMW(Arith))
700 continue;
701 if (Func->isVerbose(IceV_RMW)) {
702 Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
703 Load->dump(Func);
704 Str << "\n ";
705 Arith->dump(Func);
706 Str << "\n ";
707 Store->dump(Func);
708 Str << "\n";
709 }
710 Variable *Beacon = Func->makeVariable(IceType_i32);
711 Beacon->setWeight(0);
712 Store->setRmwBeacon(Beacon);
713 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
714 Node->getInsts().insert(I3, BeaconDef);
715 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create(
716 Func, Arith->getSrc(1), Store->getAddr(), Beacon,
717 Arith->getOp());
718 Node->getInsts().insert(I3, RMW);
719 }
720 }
721 }
722 }
723 }
724 }
725
726 namespace {
727
582 // Converts a ConstantInteger32 operand into its constant value, or 728 // Converts a ConstantInteger32 operand into its constant value, or
583 // MemoryOrderInvalid if the operand is not a ConstantInteger32. 729 // MemoryOrderInvalid if the operand is not a ConstantInteger32.
584 uint64_t getConstantMemoryOrder(Operand *Opnd) { 730 uint64_t getConstantMemoryOrder(Operand *Opnd) {
585 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) 731 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
586 return Integer->getValue(); 732 return Integer->getValue();
587 return Intrinsics::MemoryOrderInvalid; 733 return Intrinsics::MemoryOrderInvalid;
588 } 734 }
589 735
590 // Determines whether the dest of a Load instruction can be folded 736 // Determines whether the dest of a Load instruction can be folded
591 // into one of the src operands of a 2-operand instruction. This is 737 // into one of the src operands of a 2-operand instruction. This is
(...skipping 3795 matching lines...) Expand 10 before | Expand all | Expand 10 after
4387 // not other OperandX8632Mem, so there should be no mention of segment 4533 // not other OperandX8632Mem, so there should be no mention of segment
4388 // registers there either. 4534 // registers there either.
4389 const OperandX8632Mem::SegmentRegisters SegmentReg = 4535 const OperandX8632Mem::SegmentRegisters SegmentReg =
4390 OperandX8632Mem::DefaultSegment; 4536 OperandX8632Mem::DefaultSegment;
4391 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 4537 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4392 if (Base && Addr != Base) { 4538 if (Base && Addr != Base) {
4393 Inst->setDeleted(); 4539 Inst->setDeleted();
4394 Constant *OffsetOp = Ctx->getConstantInt32(Offset); 4540 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
4395 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, 4541 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
4396 Shift, SegmentReg); 4542 Shift, SegmentReg);
4397 Context.insert(InstStore::create(Func, Data, Addr)); 4543 InstStore *NewStore = InstStore::create(Func, Data, Addr);
4544 if (Inst->getDest())
4545 NewStore->setRmwBeacon(Inst->getRmwBeacon());
4546 Context.insert(NewStore);
4398 } 4547 }
4399 } 4548 }
4400 4549
4401 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { 4550 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
4402 // This implements the most naive possible lowering. 4551 // This implements the most naive possible lowering.
4403 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default 4552 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
4404 Operand *Src0 = Inst->getComparison(); 4553 Operand *Src0 = Inst->getComparison();
4405 SizeT NumCases = Inst->getNumCases(); 4554 SizeT NumCases = Inst->getNumCases();
4406 if (Src0->getType() == IceType_i64) { 4555 if (Src0->getType() == IceType_i64) {
4407 Src0 = legalize(Src0); // get Base/Index into physical registers 4556 Src0 = legalize(Src0); // get Base/Index into physical registers
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
4490 NextCast->setDeleted(); 4639 NextCast->setDeleted();
4491 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); 4640 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
4492 // Skip over the instruction. 4641 // Skip over the instruction.
4493 Context.advanceNext(); 4642 Context.advanceNext();
4494 } 4643 }
4495 } 4644 }
4496 } 4645 }
4497 4646
4498 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); } 4647 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); }
4499 4648
4649 void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) {
4650 // If the beacon variable's live range does not end in this
4651 // instruction, then it must end in the modified Store instruction
4652 // that follows. This means that the original Store instruction is
4653 // still there, either because the value being stored is used beyond
4654 // the Store instruction, or because dead code elimination did not
4655 // happen. In either case, we cancel RMW lowering (and the caller
4656 // deletes the RMW instruction).
4657 if (!RMW->isLastUse(RMW->getBeacon()))
4658 return;
4659 Operand *Src = RMW->getData();
4660 Type Ty = Src->getType();
4661 OperandX8632Mem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
4662 if (Ty == IceType_i64) {
4663 // TODO(stichnot): Implement.
4664 } else if (isVectorType(Ty)) {
4665 // TODO(stichnot): Implement.
4666 } else {
4667 // i8, i16, i32, f32, f64
4668 switch (RMW->getOp()) {
4669 default:
4670 // TODO(stichnot): Implement other arithmetic operators.
4671 break;
4672 case InstArithmetic::Add:
4673 Src = legalize(Src, Legal_Reg | Legal_Imm);
4674 _add_rmw(Addr, Src);
4675 return;
4676 }
4677 }
4678 llvm::report_fatal_error("Couldn't lower RMW instruction");
4679 }
4680
4681 void TargetX8632::lowerOther(const Inst *Instr) {
4682 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {
4683 lowerRMW(RMW);
4684 } else {
4685 TargetLowering::lowerOther(Instr);
4686 }
4687 }
4688
4500 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to 4689 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4501 // preserve integrity of liveness analysis. Undef values are also 4690 // preserve integrity of liveness analysis. Undef values are also
4502 // turned into zeroes, since loOperand() and hiOperand() don't expect 4691 // turned into zeroes, since loOperand() and hiOperand() don't expect
4503 // Undef input. 4692 // Undef input.
4504 void TargetX8632::prelowerPhis() { 4693 void TargetX8632::prelowerPhis() {
4505 CfgNode *Node = Context.getNode(); 4694 CfgNode *Node = Context.getNode();
4506 for (Inst &I : Node->getPhis()) { 4695 for (Inst &I : Node->getPhis()) {
4507 auto Phi = llvm::dyn_cast<InstPhi>(&I); 4696 auto Phi = llvm::dyn_cast<InstPhi>(&I);
4508 if (Phi->isDeleted()) 4697 if (Phi->isDeleted())
4509 continue; 4698 continue;
(...skipping 604 matching lines...) Expand 10 before | Expand all | Expand 10 after
5114 emitConstantPool<PoolTypeConverter<float>>(Ctx); 5303 emitConstantPool<PoolTypeConverter<float>>(Ctx);
5115 emitConstantPool<PoolTypeConverter<double>>(Ctx); 5304 emitConstantPool<PoolTypeConverter<double>>(Ctx);
5116 } break; 5305 } break;
5117 } 5306 }
5118 } 5307 }
5119 5308
5120 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx) 5309 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)
5121 : TargetHeaderLowering(Ctx) {} 5310 : TargetHeaderLowering(Ctx) {}
5122 5311
5123 } // end of namespace Ice 5312 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/ias-multi-reloc.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698