OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 443 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
454 return; | 454 return; |
455 Func->placePhiStores(); | 455 Func->placePhiStores(); |
456 if (Func->hasError()) | 456 if (Func->hasError()) |
457 return; | 457 return; |
458 Func->deletePhis(); | 458 Func->deletePhis(); |
459 if (Func->hasError()) | 459 if (Func->hasError()) |
460 return; | 460 return; |
461 Func->dump("After Phi lowering"); | 461 Func->dump("After Phi lowering"); |
462 } | 462 } |
463 | 463 |
464 findRMW(); | |
465 | |
464 // Address mode optimization. | 466 // Address mode optimization. |
465 Func->getVMetadata()->init(VMK_SingleDefs); | 467 Func->getVMetadata()->init(VMK_SingleDefs); |
466 Func->doAddressOpt(); | 468 Func->doAddressOpt(); |
467 | 469 |
468 // Argument lowering | 470 // Argument lowering |
469 Func->doArgLowering(); | 471 Func->doArgLowering(); |
470 | 472 |
471 // Target lowering. This requires liveness analysis for some parts | 473 // Target lowering. This requires liveness analysis for some parts |
472 // of the lowering decisions, such as compare/branch fusing. If | 474 // of the lowering decisions, such as compare/branch fusing. If |
473 // non-lightweight liveness analysis is used, the instructions need | 475 // non-lightweight liveness analysis is used, the instructions need |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
572 Func->dump("After stack frame mapping"); | 574 Func->dump("After stack frame mapping"); |
573 | 575 |
574 // Nop insertion | 576 // Nop insertion |
575 if (Ctx->getFlags().shouldDoNopInsertion()) { | 577 if (Ctx->getFlags().shouldDoNopInsertion()) { |
576 Func->doNopInsertion(); | 578 Func->doNopInsertion(); |
577 } | 579 } |
578 } | 580 } |
579 | 581 |
580 namespace { | 582 namespace { |
581 | 583 |
584 bool canRMW(const InstArithmetic *Arith) { | |
585 Type Ty = Arith->getDest()->getType(); | |
586 bool isI64 = Ty == IceType_i64; | |
587 bool isVector = isVectorType(Ty); | |
588 | |
589 switch (Arith->getOp()) { | |
590 default: | |
591 return false; | |
592 case InstArithmetic::Add: | |
593 return !isI64 && !isVector; // TODO(stichnot): implement | |
jvoung (off chromium)
2015/06/16 17:59:22
not TODO for Add anymore?
Jim Stichnoth
2015/06/17 00:15:40
I meant implement the i64 and vector variants. Cl
| |
594 case InstArithmetic::Sub: | |
595 case InstArithmetic::And: | |
596 case InstArithmetic::Or: | |
597 case InstArithmetic::Xor: | |
598 case InstArithmetic::Fadd: | |
599 case InstArithmetic::Fsub: | |
600 case InstArithmetic::Fmul: | |
601 case InstArithmetic::Fdiv: | |
602 return false; // TODO(stichnot): implement | |
603 return true; | |
604 case InstArithmetic::Shl: | |
605 case InstArithmetic::Lshr: | |
606 case InstArithmetic::Ashr: | |
607 return false; // TODO(stichnot): implement | |
608 return !isI64 && !isVector; | |
609 } | |
610 // Not handled for lack of simple lowering: | |
611 // shift on i64 and vectors | |
jvoung (off chromium)
2015/06/16 17:59:22
Maybe put this earlier, to explain the "return !is
Jim Stichnoth
2015/06/17 00:15:40
Done.
| |
612 // mul, udiv, urem, sdiv, srem, frem | |
613 } | |
614 | |
615 } // end of anonymous namespace | |
616 | |
617 void TargetX8632::findRMW() { | |
618 OstreamLocker L(Func->getContext()); | |
619 Ostream &Str = Func->getContext()->getStrDump(); | |
620 for (CfgNode *Node : Func->getNodes()) { | |
621 // Walk through the instructions, considering each sequence of 3 | |
622 // instructions, and look for the particular RMW pattern. Note that this | |
623 // search can be "broken" (false negatives) if there are intervening deleted | |
624 // instructions, or intervening instructions that could be safely moved out | |
625 // of the way to reveal an RMW pattern. | |
626 auto E = Node->getInsts().end(); | |
627 auto I1 = E, I2 = E, I3 = Node->getInsts().begin(); | |
628 for (; I3 != E; I1 = I2, I2 = I3, ++I3) { | |
629 if (I1 == E || I2 == E) | |
630 continue; | |
631 if (I1->isDeleted() || I2->isDeleted() || I3->isDeleted()) | |
632 continue; | |
633 if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) { | |
634 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) { | |
635 if (auto *Store = llvm::dyn_cast<InstStore>(I3)) { | |
636 // Look for: | |
637 // a = Load addr | |
638 // b = <op> a, other | |
639 // Store b, addr | |
640 // Change to: | |
641 // a = Load addr | |
642 // b = <op> a, other | |
643 // x = FakeDef | |
644 // RMW <op>, addr, other, x | |
645 // b = Store b, addr, x | |
jvoung (off chromium)
2015/06/16 17:59:21
Is this redefinition of b from " b = Store b, addr
Jim Stichnoth
2015/06/17 00:15:40
Yes, clarified in the comment.
| |
646 // | |
647 // With this transformation, the Store instruction acquires a Dest | |
648 // variable and is now subject to dead code elimination if there are | |
649 // no more uses of "b". Variable "x" is a beacon for determining | |
650 // whether the Store instruction gets dead-code eliminated. If the | |
651 // Store instruction is eliminated, then it must be the case that | |
652 // the RMW instruction ends x's live range, and therefore the RMW | |
653 // instruction will be retained and later lowered. On the other | |
654 // hand, if the RMW instruction does not end x's live range, then | |
655 // the Store instruction must still be present, and therefore the | |
656 // RMW instruction is ignored during lowering because it is | |
657 // redundant with the Store instruction. | |
658 // | |
659 // Note that if "a" has further uses, the RMW transformation may | |
660 // still trigger, resulting in two loads and one store, which is | |
661 // worse than the original one load and one store. However, this is | |
662 // probably rare, and caching probably keeps it just as fast. | |
663 if (Load->getSourceAddress() != Store->getAddr()) | |
664 continue; | |
665 if (Arith->getSrc(0) != Load->getDest()) | |
666 continue; | |
667 if (Arith->getDest() != Store->getData()) | |
668 continue; | |
669 if (!canRMW(Arith)) | |
670 continue; | |
671 if (Func->isVerbose(IceV_RMW)) { | |
672 Str << "Found RMW in " << Func->getFunctionName() << ":\n "; | |
673 Load->dump(Func); | |
674 Str << "\n "; | |
675 Arith->dump(Func); | |
676 Str << "\n "; | |
677 Store->dump(Func); | |
678 Str << "\n"; | |
679 } | |
680 Variable *Beacon = Func->makeVariable(IceType_i32); | |
681 Beacon->setWeight(0); | |
682 Store->setRmwBeacon(Beacon); | |
683 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon); | |
684 Node->getInsts().insert(I3, BeaconDef); | |
685 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create( | |
686 Func, Arith->getSrc(1), Store->getAddr(), Beacon, | |
687 Arith->getOp()); | |
688 Node->getInsts().insert(I3, RMW); | |
jvoung (off chromium)
2015/06/16 17:59:21
Just checking: should this be insert(I3, RMW) or i
Jim Stichnoth
2015/06/17 00:15:40
This is as intended. First we insert the BeaconDe
| |
689 } | |
690 } | |
691 } | |
692 } | |
693 } | |
694 } | |
695 | |
696 namespace { | |
697 | |
582 // Converts a ConstantInteger32 operand into its constant value, or | 698 // Converts a ConstantInteger32 operand into its constant value, or |
583 // MemoryOrderInvalid if the operand is not a ConstantInteger32. | 699 // MemoryOrderInvalid if the operand is not a ConstantInteger32. |
584 uint64_t getConstantMemoryOrder(Operand *Opnd) { | 700 uint64_t getConstantMemoryOrder(Operand *Opnd) { |
585 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 701 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
586 return Integer->getValue(); | 702 return Integer->getValue(); |
587 return Intrinsics::MemoryOrderInvalid; | 703 return Intrinsics::MemoryOrderInvalid; |
588 } | 704 } |
589 | 705 |
590 // Determines whether the dest of a Load instruction can be folded | 706 // Determines whether the dest of a Load instruction can be folded |
591 // into one of the src operands of a 2-operand instruction. This is | 707 // into one of the src operands of a 2-operand instruction. This is |
(...skipping 3776 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4368 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); | 4484 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); |
4369 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); | 4485 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); |
4370 } else if (isVectorType(Ty)) { | 4486 } else if (isVectorType(Ty)) { |
4371 _storep(legalizeToVar(Value), NewAddr); | 4487 _storep(legalizeToVar(Value), NewAddr); |
4372 } else { | 4488 } else { |
4373 Value = legalize(Value, Legal_Reg | Legal_Imm); | 4489 Value = legalize(Value, Legal_Reg | Legal_Imm); |
4374 _store(Value, NewAddr); | 4490 _store(Value, NewAddr); |
4375 } | 4491 } |
4376 } | 4492 } |
4377 | 4493 |
4378 void TargetX8632::doAddressOptStore() { | 4494 void TargetX8632::doAddressOptStore() { |
jvoung (off chromium)
2015/06/16 17:59:22
At some point, is it worth having a doAddressOptRM
Jim Stichnoth
2015/06/17 00:15:40
Yikes! I didn't mean to disable address mode opti
jvoung (off chromium)
2015/06/17 16:40:22
Could add a test case to the .ll that shows addres
Jim Stichnoth
2015/06/18 05:15:12
Another good catch! I forgot that the address mod
| |
4379 InstStore *Inst = llvm::cast<InstStore>(Context.getCur()); | 4495 InstStore *Inst = llvm::cast<InstStore>(Context.getCur()); |
4380 Operand *Data = Inst->getData(); | 4496 Operand *Data = Inst->getData(); |
4381 Operand *Addr = Inst->getAddr(); | 4497 Operand *Addr = Inst->getAddr(); |
4382 Variable *Index = nullptr; | 4498 Variable *Index = nullptr; |
4383 uint16_t Shift = 0; | 4499 uint16_t Shift = 0; |
4384 int32_t Offset = 0; // TODO: make Constant | 4500 int32_t Offset = 0; // TODO: make Constant |
4385 Variable *Base = llvm::dyn_cast<Variable>(Addr); | 4501 Variable *Base = llvm::dyn_cast<Variable>(Addr); |
4386 // Vanilla ICE store instructions should not use the segment registers, | 4502 // Vanilla ICE store instructions should not use the segment registers, |
4387 // and computeAddressOpt only works at the level of Variables and Constants, | 4503 // and computeAddressOpt only works at the level of Variables and Constants, |
4388 // not other OperandX8632Mem, so there should be no mention of segment | 4504 // not other OperandX8632Mem, so there should be no mention of segment |
4389 // registers there either. | 4505 // registers there either. |
4390 const OperandX8632Mem::SegmentRegisters SegmentReg = | 4506 const OperandX8632Mem::SegmentRegisters SegmentReg = |
4391 OperandX8632Mem::DefaultSegment; | 4507 OperandX8632Mem::DefaultSegment; |
4392 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); | 4508 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); |
4393 if (Base && Addr != Base) { | 4509 if (Base && Addr != Base) { |
4394 Inst->setDeleted(); | 4510 Inst->setDeleted(); |
4395 Constant *OffsetOp = Ctx->getConstantInt32(Offset); | 4511 Constant *OffsetOp = Ctx->getConstantInt32(Offset); |
4396 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, | 4512 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, |
4397 Shift, SegmentReg); | 4513 Shift, SegmentReg); |
4398 Context.insert(InstStore::create(Func, Data, Addr)); | 4514 InstStore *NewStore = InstStore::create(Func, Data, Addr); |
4515 if (Inst->getDest()) | |
4516 NewStore->setRmwBeacon(Inst->getRmwBeacon()); | |
4517 Context.insert(NewStore); | |
4399 } | 4518 } |
4400 } | 4519 } |
4401 | 4520 |
4402 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { | 4521 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { |
4403 // This implements the most naive possible lowering. | 4522 // This implements the most naive possible lowering. |
4404 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default | 4523 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default |
4405 Operand *Src0 = Inst->getComparison(); | 4524 Operand *Src0 = Inst->getComparison(); |
4406 SizeT NumCases = Inst->getNumCases(); | 4525 SizeT NumCases = Inst->getNumCases(); |
4407 if (Src0->getType() == IceType_i64) { | 4526 if (Src0->getType() == IceType_i64) { |
4408 Src0 = legalize(Src0); // get Base/Index into physical registers | 4527 Src0 = legalize(Src0); // get Base/Index into physical registers |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4491 NextCast->setDeleted(); | 4610 NextCast->setDeleted(); |
4492 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); | 4611 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); |
4493 // Skip over the instruction. | 4612 // Skip over the instruction. |
4494 Context.advanceNext(); | 4613 Context.advanceNext(); |
4495 } | 4614 } |
4496 } | 4615 } |
4497 } | 4616 } |
4498 | 4617 |
4499 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); } | 4618 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); } |
4500 | 4619 |
4620 void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) { | |
4621 // If the beacon variable's live range does not end in this | |
4622 // instruction, then it must end in the modified Store instruction | |
4623 // that follows. This means that the original Store instruction is | |
4624 // still there, either because the value being stored is used beyond | |
4625 // the Store instruction, or because dead code elimination did not | |
4626 // happen. In either case, we cancel RMW lowering (and the caller | |
4627 // deletes the RMW instruction). | |
4628 if (!RMW->isLastUse(RMW->getBeacon())) | |
4629 return; | |
4630 Operand *Src = RMW->getData(); | |
4631 Type Ty = Src->getType(); | |
4632 Operand *Addr = formMemoryOperand(RMW->getAddr(), Ty); | |
4633 if (Ty == IceType_i64) { | |
4634 // TODO(stichnot): Implement. | |
4635 } else if (isVectorType(Ty)) { | |
4636 // TODO(stichnot): Implement. | |
4637 } else { | |
4638 // i8, i16, i32, f32, f64 | |
4639 switch (RMW->getOp()) { | |
4640 default: | |
4641 // TODO(stichnot): Implement other arithmetic operators. | |
4642 break; | |
4643 case InstArithmetic::Add: | |
4644 Src = legalize(Src, Legal_Reg | Legal_Imm); | |
4645 _add_rmw(Addr, Src); | |
4646 return; | |
4647 } | |
4648 } | |
4649 llvm::report_fatal_error("Couldn't lower RMW instruction"); | |
4650 } | |
4651 | |
4652 void TargetX8632::lowerOther(const Inst *Instr) { | |
4653 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { | |
4654 lowerRMW(RMW); | |
4655 } else { | |
4656 TargetLowering::lowerOther(Instr); | |
4657 } | |
4658 } | |
4659 | |
4501 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to | 4660 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to |
4502 // preserve integrity of liveness analysis. Undef values are also | 4661 // preserve integrity of liveness analysis. Undef values are also |
4503 // turned into zeroes, since loOperand() and hiOperand() don't expect | 4662 // turned into zeroes, since loOperand() and hiOperand() don't expect |
4504 // Undef input. | 4663 // Undef input. |
4505 void TargetX8632::prelowerPhis() { | 4664 void TargetX8632::prelowerPhis() { |
4506 CfgNode *Node = Context.getNode(); | 4665 CfgNode *Node = Context.getNode(); |
4507 for (Inst &I : Node->getPhis()) { | 4666 for (Inst &I : Node->getPhis()) { |
4508 auto Phi = llvm::dyn_cast<InstPhi>(&I); | 4667 auto Phi = llvm::dyn_cast<InstPhi>(&I); |
4509 if (Phi->isDeleted()) | 4668 if (Phi->isDeleted()) |
4510 continue; | 4669 continue; |
(...skipping 604 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5115 emitConstantPool<PoolTypeConverter<float>>(Ctx); | 5274 emitConstantPool<PoolTypeConverter<float>>(Ctx); |
5116 emitConstantPool<PoolTypeConverter<double>>(Ctx); | 5275 emitConstantPool<PoolTypeConverter<double>>(Ctx); |
5117 } break; | 5276 } break; |
5118 } | 5277 } |
5119 } | 5278 } |
5120 | 5279 |
5121 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx) | 5280 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx) |
5122 : TargetHeaderLowering(Ctx) {} | 5281 : TargetHeaderLowering(Ctx) {} |
5123 | 5282 |
5124 } // end of namespace Ice | 5283 } // end of namespace Ice |
OLD | NEW |