Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(171)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1182603004: Subzero: Transform suitable Load/Arith/Store sequences into RMW ops. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Code review changes Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 447 matching lines...) Expand 10 before | Expand all | Expand 10 after
458 Func->deletePhis(); 458 Func->deletePhis();
459 if (Func->hasError()) 459 if (Func->hasError())
460 return; 460 return;
461 Func->dump("After Phi lowering"); 461 Func->dump("After Phi lowering");
462 } 462 }
463 463
464 // Address mode optimization. 464 // Address mode optimization.
465 Func->getVMetadata()->init(VMK_SingleDefs); 465 Func->getVMetadata()->init(VMK_SingleDefs);
466 Func->doAddressOpt(); 466 Func->doAddressOpt();
467 467
468 // Find read-modify-write opportunities. Do this after address mode
469 // optimization so that doAddressOpt() doesn't need to be applied to RMW
470 // instructions as well.
471 findRMW();
472 Func->dump("After RMW transform");
473
468 // Argument lowering 474 // Argument lowering
469 Func->doArgLowering(); 475 Func->doArgLowering();
470 476
471 // Target lowering. This requires liveness analysis for some parts 477 // Target lowering. This requires liveness analysis for some parts
472 // of the lowering decisions, such as compare/branch fusing. If 478 // of the lowering decisions, such as compare/branch fusing. If
473 // non-lightweight liveness analysis is used, the instructions need 479 // non-lightweight liveness analysis is used, the instructions need
474 // to be renumbered first. TODO: This renumbering should only be 480 // to be renumbered first. TODO: This renumbering should only be
475 // necessary if we're actually calculating live intervals, which we 481 // necessary if we're actually calculating live intervals, which we
476 // only do for register allocation. 482 // only do for register allocation.
477 Func->renumberInstructions(); 483 Func->renumberInstructions();
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
572 Func->dump("After stack frame mapping"); 578 Func->dump("After stack frame mapping");
573 579
574 // Nop insertion 580 // Nop insertion
575 if (Ctx->getFlags().shouldDoNopInsertion()) { 581 if (Ctx->getFlags().shouldDoNopInsertion()) {
576 Func->doNopInsertion(); 582 Func->doNopInsertion();
577 } 583 }
578 } 584 }
579 585
580 namespace { 586 namespace {
581 587
588 bool canRMW(const InstArithmetic *Arith) {
589 Type Ty = Arith->getDest()->getType();
590 bool isI64 = Ty == IceType_i64;
591 bool isVector = isVectorType(Ty);
592
593 switch (Arith->getOp()) {
594 // Not handled for lack of simple lowering:
595 // shift on i64 and vectors
596 // mul, udiv, urem, sdiv, srem, frem
597 default:
598 return false;
599 case InstArithmetic::Add:
600 return !isI64 && !isVector; // TODO(stichnot): implement i64 and vector
601 case InstArithmetic::Sub:
602 case InstArithmetic::And:
603 case InstArithmetic::Or:
604 case InstArithmetic::Xor:
605 case InstArithmetic::Fadd:
606 case InstArithmetic::Fsub:
607 case InstArithmetic::Fmul:
608 case InstArithmetic::Fdiv:
609 return false; // TODO(stichnot): implement
610 return true;
611 case InstArithmetic::Shl:
612 case InstArithmetic::Lshr:
613 case InstArithmetic::Ashr:
614 return false; // TODO(stichnot): implement
615 return !isI64 && !isVector;
616 }
617 }
618
619 bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
620 if (A == B)
621 return true;
622 if (auto *MemA = llvm::dyn_cast<OperandX8632Mem>(A)) {
623 if (auto *MemB = llvm::dyn_cast<OperandX8632Mem>(B)) {
624 return MemA->getBase() == MemB->getBase() &&
625 MemA->getOffset() == MemB->getOffset() &&
626 MemA->getIndex() == MemB->getIndex() &&
627 MemA->getShift() == MemB->getShift() &&
628 MemA->getSegmentRegister() == MemB->getSegmentRegister();
629 }
630 }
631 return false;
632 }
633
634 } // end of anonymous namespace
635
636 void TargetX8632::findRMW() {
637 OstreamLocker L(Func->getContext());
638 Ostream &Str = Func->getContext()->getStrDump();
639 for (CfgNode *Node : Func->getNodes()) {
640 // Walk through the instructions, considering each sequence of 3
641 // instructions, and look for the particular RMW pattern. Note that this
642 // search can be "broken" (false negatives) if there are intervening deleted
643 // instructions, or intervening instructions that could be safely moved out
644 // of the way to reveal an RMW pattern.
645 auto E = Node->getInsts().end();
646 auto I1 = E, I2 = E, I3 = Node->getInsts().begin();
647 for (; I3 != E; I1 = I2, I2 = I3, ++I3) {
648 if (I1 == E || I2 == E)
649 continue;
650 if (I1->isDeleted() || I2->isDeleted() || I3->isDeleted())
651 continue;
652 if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) {
653 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) {
654 if (auto *Store = llvm::dyn_cast<InstStore>(I3)) {
655 // Look for:
656 // a = Load addr
657 // b = <op> a, other
658 // Store b, addr
659 // Change to:
660 // a = Load addr
661 // b = <op> a, other
662 // x = FakeDef
663 // RMW <op>, addr, other, x
664 // b = Store b, addr, x
665 // Note that inferTwoAddress() makes sure setDestNonKillable() gets
666 // called on the updated Store instruction, to avoid liveness
667 // problems later.
668 //
669 // With this transformation, the Store instruction acquires a Dest
670 // variable and is now subject to dead code elimination if there are
671 // no more uses of "b". Variable "x" is a beacon for determining
672 // whether the Store instruction gets dead-code eliminated. If the
673 // Store instruction is eliminated, then it must be the case that
674 // the RMW instruction ends x's live range, and therefore the RMW
675 // instruction will be retained and later lowered. On the other
676 // hand, if the RMW instruction does not end x's live range, then
677 // the Store instruction must still be present, and therefore the
678 // RMW instruction is ignored during lowering because it is
679 // redundant with the Store instruction.
680 //
681 // Note that if "a" has further uses, the RMW transformation may
682 // still trigger, resulting in two loads and one store, which is
683 // worse than the original one load and one store. However, this is
684 // probably rare, and caching probably keeps it just as fast.
685 if (!isSameMemAddressOperand(Load->getSourceAddress(),
686 Store->getAddr()))
687 continue;
688 if (false && Load->getSourceAddress() != Store->getAddr())
689 continue;
690 if (Arith->getSrc(0) != Load->getDest())
691 continue;
692 if (Arith->getDest() != Store->getData())
693 continue;
694 if (!canRMW(Arith))
695 continue;
696 if (Func->isVerbose(IceV_RMW)) {
697 Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
698 Load->dump(Func);
699 Str << "\n ";
700 Arith->dump(Func);
701 Str << "\n ";
702 Store->dump(Func);
703 Str << "\n";
704 }
705 Variable *Beacon = Func->makeVariable(IceType_i32);
706 Beacon->setWeight(0);
707 Store->setRmwBeacon(Beacon);
708 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
709 Node->getInsts().insert(I3, BeaconDef);
710 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create(
711 Func, Arith->getSrc(1), Store->getAddr(), Beacon,
712 Arith->getOp());
713 Node->getInsts().insert(I3, RMW);
714 }
715 }
716 }
717 }
718 }
719 }
720
721 namespace {
722
582 // Converts a ConstantInteger32 operand into its constant value, or 723 // Converts a ConstantInteger32 operand into its constant value, or
583 // MemoryOrderInvalid if the operand is not a ConstantInteger32. 724 // MemoryOrderInvalid if the operand is not a ConstantInteger32.
584 uint64_t getConstantMemoryOrder(Operand *Opnd) { 725 uint64_t getConstantMemoryOrder(Operand *Opnd) {
585 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) 726 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
586 return Integer->getValue(); 727 return Integer->getValue();
587 return Intrinsics::MemoryOrderInvalid; 728 return Intrinsics::MemoryOrderInvalid;
588 } 729 }
589 730
590 // Determines whether the dest of a Load instruction can be folded 731 // Determines whether the dest of a Load instruction can be folded
591 // into one of the src operands of a 2-operand instruction. This is 732 // into one of the src operands of a 2-operand instruction. This is
(...skipping 3796 matching lines...) Expand 10 before | Expand all | Expand 10 after
4388 // not other OperandX8632Mem, so there should be no mention of segment 4529 // not other OperandX8632Mem, so there should be no mention of segment
4389 // registers there either. 4530 // registers there either.
4390 const OperandX8632Mem::SegmentRegisters SegmentReg = 4531 const OperandX8632Mem::SegmentRegisters SegmentReg =
4391 OperandX8632Mem::DefaultSegment; 4532 OperandX8632Mem::DefaultSegment;
4392 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 4533 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4393 if (Base && Addr != Base) { 4534 if (Base && Addr != Base) {
4394 Inst->setDeleted(); 4535 Inst->setDeleted();
4395 Constant *OffsetOp = Ctx->getConstantInt32(Offset); 4536 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
4396 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, 4537 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
4397 Shift, SegmentReg); 4538 Shift, SegmentReg);
4398 Context.insert(InstStore::create(Func, Data, Addr)); 4539 InstStore *NewStore = InstStore::create(Func, Data, Addr);
4540 if (Inst->getDest())
4541 NewStore->setRmwBeacon(Inst->getRmwBeacon());
4542 Context.insert(NewStore);
4399 } 4543 }
4400 } 4544 }
4401 4545
4402 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { 4546 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
4403 // This implements the most naive possible lowering. 4547 // This implements the most naive possible lowering.
4404 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default 4548 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
4405 Operand *Src0 = Inst->getComparison(); 4549 Operand *Src0 = Inst->getComparison();
4406 SizeT NumCases = Inst->getNumCases(); 4550 SizeT NumCases = Inst->getNumCases();
4407 if (Src0->getType() == IceType_i64) { 4551 if (Src0->getType() == IceType_i64) {
4408 Src0 = legalize(Src0); // get Base/Index into physical registers 4552 Src0 = legalize(Src0); // get Base/Index into physical registers
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
4491 NextCast->setDeleted(); 4635 NextCast->setDeleted();
4492 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); 4636 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
4493 // Skip over the instruction. 4637 // Skip over the instruction.
4494 Context.advanceNext(); 4638 Context.advanceNext();
4495 } 4639 }
4496 } 4640 }
4497 } 4641 }
4498 4642
4499 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); } 4643 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); }
4500 4644
4645 void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) {
4646 // If the beacon variable's live range does not end in this
4647 // instruction, then it must end in the modified Store instruction
4648 // that follows. This means that the original Store instruction is
4649 // still there, either because the value being stored is used beyond
4650 // the Store instruction, or because dead code elimination did not
4651 // happen. In either case, we cancel RMW lowering (and the caller
4652 // deletes the RMW instruction).
4653 if (!RMW->isLastUse(RMW->getBeacon()))
4654 return;
4655 Operand *Src = RMW->getData();
4656 Type Ty = Src->getType();
4657 OperandX8632Mem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
4658 if (Ty == IceType_i64) {
4659 // TODO(stichnot): Implement.
4660 } else if (isVectorType(Ty)) {
4661 // TODO(stichnot): Implement.
4662 } else {
4663 // i8, i16, i32, f32, f64
4664 switch (RMW->getOp()) {
4665 default:
4666 // TODO(stichnot): Implement other arithmetic operators.
4667 break;
4668 case InstArithmetic::Add:
4669 Src = legalize(Src, Legal_Reg | Legal_Imm);
4670 _add_rmw(Addr, Src);
4671 return;
4672 }
4673 }
4674 llvm::report_fatal_error("Couldn't lower RMW instruction");
4675 }
4676
4677 void TargetX8632::lowerOther(const Inst *Instr) {
4678 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {
4679 lowerRMW(RMW);
4680 } else {
4681 TargetLowering::lowerOther(Instr);
4682 }
4683 }
4684
4501 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to 4685 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4502 // preserve integrity of liveness analysis. Undef values are also 4686 // preserve integrity of liveness analysis. Undef values are also
4503 // turned into zeroes, since loOperand() and hiOperand() don't expect 4687 // turned into zeroes, since loOperand() and hiOperand() don't expect
4504 // Undef input. 4688 // Undef input.
4505 void TargetX8632::prelowerPhis() { 4689 void TargetX8632::prelowerPhis() {
4506 CfgNode *Node = Context.getNode(); 4690 CfgNode *Node = Context.getNode();
4507 for (Inst &I : Node->getPhis()) { 4691 for (Inst &I : Node->getPhis()) {
4508 auto Phi = llvm::dyn_cast<InstPhi>(&I); 4692 auto Phi = llvm::dyn_cast<InstPhi>(&I);
4509 if (Phi->isDeleted()) 4693 if (Phi->isDeleted())
4510 continue; 4694 continue;
(...skipping 604 matching lines...) Expand 10 before | Expand all | Expand 10 after
5115 emitConstantPool<PoolTypeConverter<float>>(Ctx); 5299 emitConstantPool<PoolTypeConverter<float>>(Ctx);
5116 emitConstantPool<PoolTypeConverter<double>>(Ctx); 5300 emitConstantPool<PoolTypeConverter<double>>(Ctx);
5117 } break; 5301 } break;
5118 } 5302 }
5119 } 5303 }
5120 5304
5121 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx) 5305 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)
5122 : TargetHeaderLowering(Ctx) {} 5306 : TargetHeaderLowering(Ctx) {}
5123 5307
5124 } // end of namespace Ice 5308 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698