Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(684)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1182603004: Subzero: Transform suitable Load/Arith/Store sequences into RMW ops. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Cleanup Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 443 matching lines...) Expand 10 before | Expand all | Expand 10 after
454 return; 454 return;
455 Func->placePhiStores(); 455 Func->placePhiStores();
456 if (Func->hasError()) 456 if (Func->hasError())
457 return; 457 return;
458 Func->deletePhis(); 458 Func->deletePhis();
459 if (Func->hasError()) 459 if (Func->hasError())
460 return; 460 return;
461 Func->dump("After Phi lowering"); 461 Func->dump("After Phi lowering");
462 } 462 }
463 463
464 findRMW();
465
464 // Address mode optimization. 466 // Address mode optimization.
465 Func->getVMetadata()->init(VMK_SingleDefs); 467 Func->getVMetadata()->init(VMK_SingleDefs);
466 Func->doAddressOpt(); 468 Func->doAddressOpt();
467 469
468 // Argument lowering 470 // Argument lowering
469 Func->doArgLowering(); 471 Func->doArgLowering();
470 472
471 // Target lowering. This requires liveness analysis for some parts 473 // Target lowering. This requires liveness analysis for some parts
472 // of the lowering decisions, such as compare/branch fusing. If 474 // of the lowering decisions, such as compare/branch fusing. If
473 // non-lightweight liveness analysis is used, the instructions need 475 // non-lightweight liveness analysis is used, the instructions need
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
572 Func->dump("After stack frame mapping"); 574 Func->dump("After stack frame mapping");
573 575
574 // Nop insertion 576 // Nop insertion
575 if (Ctx->getFlags().shouldDoNopInsertion()) { 577 if (Ctx->getFlags().shouldDoNopInsertion()) {
576 Func->doNopInsertion(); 578 Func->doNopInsertion();
577 } 579 }
578 } 580 }
579 581
580 namespace { 582 namespace {
581 583
584 bool canRMW(const InstArithmetic *Arith) {
585 Type Ty = Arith->getDest()->getType();
586 bool isI64 = Ty == IceType_i64;
587 bool isVector = isVectorType(Ty);
588
589 switch (Arith->getOp()) {
590 default:
591 return false;
592 case InstArithmetic::Add:
593 return !isI64 && !isVector; // TODO(stichnot): implement
jvoung (off chromium) 2015/06/16 17:59:22 not TODO for Add anymore?
Jim Stichnoth 2015/06/17 00:15:40 I meant implement the i64 and vector variants. Cl
594 case InstArithmetic::Sub:
595 case InstArithmetic::And:
596 case InstArithmetic::Or:
597 case InstArithmetic::Xor:
598 case InstArithmetic::Fadd:
599 case InstArithmetic::Fsub:
600 case InstArithmetic::Fmul:
601 case InstArithmetic::Fdiv:
602 return false; // TODO(stichnot): implement
603 return true;
604 case InstArithmetic::Shl:
605 case InstArithmetic::Lshr:
606 case InstArithmetic::Ashr:
607 return false; // TODO(stichnot): implement
608 return !isI64 && !isVector;
609 }
610 // Not handled for lack of simple lowering:
611 // shift on i64 and vectors
jvoung (off chromium) 2015/06/16 17:59:22 Maybe put this earlier, to explain the "return !is
Jim Stichnoth 2015/06/17 00:15:40 Done.
612 // mul, udiv, urem, sdiv, srem, frem
613 }
614
615 } // end of anonymous namespace
616
617 void TargetX8632::findRMW() {
618 OstreamLocker L(Func->getContext());
619 Ostream &Str = Func->getContext()->getStrDump();
620 for (CfgNode *Node : Func->getNodes()) {
621 // Walk through the instructions, considering each sequence of 3
622 // instructions, and look for the particular RMW pattern. Note that this
623 // search can be "broken" (false negatives) if there are intervening deleted
624 // instructions, or intervening instructions that could be safely moved out
625 // of the way to reveal an RMW pattern.
626 auto E = Node->getInsts().end();
627 auto I1 = E, I2 = E, I3 = Node->getInsts().begin();
628 for (; I3 != E; I1 = I2, I2 = I3, ++I3) {
629 if (I1 == E || I2 == E)
630 continue;
631 if (I1->isDeleted() || I2->isDeleted() || I3->isDeleted())
632 continue;
633 if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) {
634 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) {
635 if (auto *Store = llvm::dyn_cast<InstStore>(I3)) {
636 // Look for:
637 // a = Load addr
638 // b = <op> a, other
639 // Store b, addr
640 // Change to:
641 // a = Load addr
642 // b = <op> a, other
643 // x = FakeDef
644 // RMW <op>, addr, other, x
645 // b = Store b, addr, x
jvoung (off chromium) 2015/06/16 17:59:21 Is this redefinition of b from " b = Store b, addr
Jim Stichnoth 2015/06/17 00:15:40 Yes, clarified in the comment.
646 //
647 // With this transformation, the Store instruction acquires a Dest
648 // variable and is now subject to dead code elimination if there are
649 // no more uses of "b". Variable "x" is a beacon for determining
650 // whether the Store instruction gets dead-code eliminated. If the
651 // Store instruction is eliminated, then it must be the case that
652 // the RMW instruction ends x's live range, and therefore the RMW
653 // instruction will be retained and later lowered. On the other
654 // hand, if the RMW instruction does not end x's live range, then
655 // the Store instruction must still be present, and therefore the
656 // RMW instruction is ignored during lowering because it is
657 // redundant with the Store instruction.
658 //
659 // Note that if "a" has further uses, the RMW transformation may
660 // still trigger, resulting in two loads and one store, which is
661 // worse than the original one load and one store. However, this is
662 // probably rare, and caching probably keeps it just as fast.
663 if (Load->getSourceAddress() != Store->getAddr())
664 continue;
665 if (Arith->getSrc(0) != Load->getDest())
666 continue;
667 if (Arith->getDest() != Store->getData())
668 continue;
669 if (!canRMW(Arith))
670 continue;
671 if (Func->isVerbose(IceV_RMW)) {
672 Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
673 Load->dump(Func);
674 Str << "\n ";
675 Arith->dump(Func);
676 Str << "\n ";
677 Store->dump(Func);
678 Str << "\n";
679 }
680 Variable *Beacon = Func->makeVariable(IceType_i32);
681 Beacon->setWeight(0);
682 Store->setRmwBeacon(Beacon);
683 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
684 Node->getInsts().insert(I3, BeaconDef);
685 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create(
686 Func, Arith->getSrc(1), Store->getAddr(), Beacon,
687 Arith->getOp());
688 Node->getInsts().insert(I3, RMW);
jvoung (off chromium) 2015/06/16 17:59:21 Just checking: should this be insert(I3, RMW) or i
Jim Stichnoth 2015/06/17 00:15:40 This is as intended. First we insert the BeaconDe
689 }
690 }
691 }
692 }
693 }
694 }
695
696 namespace {
697
582 // Converts a ConstantInteger32 operand into its constant value, or 698 // Converts a ConstantInteger32 operand into its constant value, or
583 // MemoryOrderInvalid if the operand is not a ConstantInteger32. 699 // MemoryOrderInvalid if the operand is not a ConstantInteger32.
584 uint64_t getConstantMemoryOrder(Operand *Opnd) { 700 uint64_t getConstantMemoryOrder(Operand *Opnd) {
585 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) 701 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
586 return Integer->getValue(); 702 return Integer->getValue();
587 return Intrinsics::MemoryOrderInvalid; 703 return Intrinsics::MemoryOrderInvalid;
588 } 704 }
589 705
590 // Determines whether the dest of a Load instruction can be folded 706 // Determines whether the dest of a Load instruction can be folded
591 // into one of the src operands of a 2-operand instruction. This is 707 // into one of the src operands of a 2-operand instruction. This is
(...skipping 3776 matching lines...) Expand 10 before | Expand all | Expand 10 after
4368 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); 4484 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
4369 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); 4485 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
4370 } else if (isVectorType(Ty)) { 4486 } else if (isVectorType(Ty)) {
4371 _storep(legalizeToVar(Value), NewAddr); 4487 _storep(legalizeToVar(Value), NewAddr);
4372 } else { 4488 } else {
4373 Value = legalize(Value, Legal_Reg | Legal_Imm); 4489 Value = legalize(Value, Legal_Reg | Legal_Imm);
4374 _store(Value, NewAddr); 4490 _store(Value, NewAddr);
4375 } 4491 }
4376 } 4492 }
4377 4493
4378 void TargetX8632::doAddressOptStore() { 4494 void TargetX8632::doAddressOptStore() {
jvoung (off chromium) 2015/06/16 17:59:22 At some point, is it worth having a doAddressOptRM
Jim Stichnoth 2015/06/17 00:15:40 Yikes! I didn't mean to disable address mode opti
jvoung (off chromium) 2015/06/17 16:40:22 Could add a test case to the .ll that shows addres
Jim Stichnoth 2015/06/18 05:15:12 Another good catch! I forgot that the address mod
4379 InstStore *Inst = llvm::cast<InstStore>(Context.getCur()); 4495 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());
4380 Operand *Data = Inst->getData(); 4496 Operand *Data = Inst->getData();
4381 Operand *Addr = Inst->getAddr(); 4497 Operand *Addr = Inst->getAddr();
4382 Variable *Index = nullptr; 4498 Variable *Index = nullptr;
4383 uint16_t Shift = 0; 4499 uint16_t Shift = 0;
4384 int32_t Offset = 0; // TODO: make Constant 4500 int32_t Offset = 0; // TODO: make Constant
4385 Variable *Base = llvm::dyn_cast<Variable>(Addr); 4501 Variable *Base = llvm::dyn_cast<Variable>(Addr);
4386 // Vanilla ICE store instructions should not use the segment registers, 4502 // Vanilla ICE store instructions should not use the segment registers,
4387 // and computeAddressOpt only works at the level of Variables and Constants, 4503 // and computeAddressOpt only works at the level of Variables and Constants,
4388 // not other OperandX8632Mem, so there should be no mention of segment 4504 // not other OperandX8632Mem, so there should be no mention of segment
4389 // registers there either. 4505 // registers there either.
4390 const OperandX8632Mem::SegmentRegisters SegmentReg = 4506 const OperandX8632Mem::SegmentRegisters SegmentReg =
4391 OperandX8632Mem::DefaultSegment; 4507 OperandX8632Mem::DefaultSegment;
4392 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 4508 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4393 if (Base && Addr != Base) { 4509 if (Base && Addr != Base) {
4394 Inst->setDeleted(); 4510 Inst->setDeleted();
4395 Constant *OffsetOp = Ctx->getConstantInt32(Offset); 4511 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
4396 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, 4512 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
4397 Shift, SegmentReg); 4513 Shift, SegmentReg);
4398 Context.insert(InstStore::create(Func, Data, Addr)); 4514 InstStore *NewStore = InstStore::create(Func, Data, Addr);
4515 if (Inst->getDest())
4516 NewStore->setRmwBeacon(Inst->getRmwBeacon());
4517 Context.insert(NewStore);
4399 } 4518 }
4400 } 4519 }
4401 4520
4402 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { 4521 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
4403 // This implements the most naive possible lowering. 4522 // This implements the most naive possible lowering.
4404 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default 4523 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
4405 Operand *Src0 = Inst->getComparison(); 4524 Operand *Src0 = Inst->getComparison();
4406 SizeT NumCases = Inst->getNumCases(); 4525 SizeT NumCases = Inst->getNumCases();
4407 if (Src0->getType() == IceType_i64) { 4526 if (Src0->getType() == IceType_i64) {
4408 Src0 = legalize(Src0); // get Base/Index into physical registers 4527 Src0 = legalize(Src0); // get Base/Index into physical registers
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
4491 NextCast->setDeleted(); 4610 NextCast->setDeleted();
4492 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); 4611 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
4493 // Skip over the instruction. 4612 // Skip over the instruction.
4494 Context.advanceNext(); 4613 Context.advanceNext();
4495 } 4614 }
4496 } 4615 }
4497 } 4616 }
4498 4617
4499 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); } 4618 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); }
4500 4619
4620 void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) {
4621 // If the beacon variable's live range does not end in this
4622 // instruction, then it must end in the modified Store instruction
4623 // that follows. This means that the original Store instruction is
4624 // still there, either because the value being stored is used beyond
4625 // the Store instruction, or because dead code elimination did not
4626 // happen. In either case, we cancel RMW lowering (and the caller
4627 // deletes the RMW instruction).
4628 if (!RMW->isLastUse(RMW->getBeacon()))
4629 return;
4630 Operand *Src = RMW->getData();
4631 Type Ty = Src->getType();
4632 Operand *Addr = formMemoryOperand(RMW->getAddr(), Ty);
4633 if (Ty == IceType_i64) {
4634 // TODO(stichnot): Implement.
4635 } else if (isVectorType(Ty)) {
4636 // TODO(stichnot): Implement.
4637 } else {
4638 // i8, i16, i32, f32, f64
4639 switch (RMW->getOp()) {
4640 default:
4641 // TODO(stichnot): Implement other arithmetic operators.
4642 break;
4643 case InstArithmetic::Add:
4644 Src = legalize(Src, Legal_Reg | Legal_Imm);
4645 _add_rmw(Addr, Src);
4646 return;
4647 }
4648 }
4649 llvm::report_fatal_error("Couldn't lower RMW instruction");
4650 }
4651
4652 void TargetX8632::lowerOther(const Inst *Instr) {
4653 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {
4654 lowerRMW(RMW);
4655 } else {
4656 TargetLowering::lowerOther(Instr);
4657 }
4658 }
4659
4501 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to 4660 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4502 // preserve integrity of liveness analysis. Undef values are also 4661 // preserve integrity of liveness analysis. Undef values are also
4503 // turned into zeroes, since loOperand() and hiOperand() don't expect 4662 // turned into zeroes, since loOperand() and hiOperand() don't expect
4504 // Undef input. 4663 // Undef input.
4505 void TargetX8632::prelowerPhis() { 4664 void TargetX8632::prelowerPhis() {
4506 CfgNode *Node = Context.getNode(); 4665 CfgNode *Node = Context.getNode();
4507 for (Inst &I : Node->getPhis()) { 4666 for (Inst &I : Node->getPhis()) {
4508 auto Phi = llvm::dyn_cast<InstPhi>(&I); 4667 auto Phi = llvm::dyn_cast<InstPhi>(&I);
4509 if (Phi->isDeleted()) 4668 if (Phi->isDeleted())
4510 continue; 4669 continue;
(...skipping 604 matching lines...) Expand 10 before | Expand all | Expand 10 after
5115 emitConstantPool<PoolTypeConverter<float>>(Ctx); 5274 emitConstantPool<PoolTypeConverter<float>>(Ctx);
5116 emitConstantPool<PoolTypeConverter<double>>(Ctx); 5275 emitConstantPool<PoolTypeConverter<double>>(Ctx);
5117 } break; 5276 } break;
5118 } 5277 }
5119 } 5278 }
5120 5279
5121 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx) 5280 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)
5122 : TargetHeaderLowering(Ctx) {} 5281 : TargetHeaderLowering(Ctx) {}
5123 5282
5124 } // end of namespace Ice 5283 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698