Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
| (...skipping 443 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 454 return; | 454 return; |
| 455 Func->placePhiStores(); | 455 Func->placePhiStores(); |
| 456 if (Func->hasError()) | 456 if (Func->hasError()) |
| 457 return; | 457 return; |
| 458 Func->deletePhis(); | 458 Func->deletePhis(); |
| 459 if (Func->hasError()) | 459 if (Func->hasError()) |
| 460 return; | 460 return; |
| 461 Func->dump("After Phi lowering"); | 461 Func->dump("After Phi lowering"); |
| 462 } | 462 } |
| 463 | 463 |
| 464 findRMW(); | |
| 465 | |
| 464 // Address mode optimization. | 466 // Address mode optimization. |
| 465 Func->getVMetadata()->init(VMK_SingleDefs); | 467 Func->getVMetadata()->init(VMK_SingleDefs); |
| 466 Func->doAddressOpt(); | 468 Func->doAddressOpt(); |
| 467 | 469 |
| 468 // Argument lowering | 470 // Argument lowering |
| 469 Func->doArgLowering(); | 471 Func->doArgLowering(); |
| 470 | 472 |
| 471 // Target lowering. This requires liveness analysis for some parts | 473 // Target lowering. This requires liveness analysis for some parts |
| 472 // of the lowering decisions, such as compare/branch fusing. If | 474 // of the lowering decisions, such as compare/branch fusing. If |
| 473 // non-lightweight liveness analysis is used, the instructions need | 475 // non-lightweight liveness analysis is used, the instructions need |
| (...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 572 Func->dump("After stack frame mapping"); | 574 Func->dump("After stack frame mapping"); |
| 573 | 575 |
| 574 // Nop insertion | 576 // Nop insertion |
| 575 if (Ctx->getFlags().shouldDoNopInsertion()) { | 577 if (Ctx->getFlags().shouldDoNopInsertion()) { |
| 576 Func->doNopInsertion(); | 578 Func->doNopInsertion(); |
| 577 } | 579 } |
| 578 } | 580 } |
| 579 | 581 |
| 580 namespace { | 582 namespace { |
| 581 | 583 |
| 584 bool canRMW(const InstArithmetic *Arith) { | |
| 585 Type Ty = Arith->getDest()->getType(); | |
| 586 bool isI64 = Ty == IceType_i64; | |
| 587 bool isVector = isVectorType(Ty); | |
| 588 | |
| 589 switch (Arith->getOp()) { | |
| 590 default: | |
| 591 return false; | |
| 592 case InstArithmetic::Add: | |
| 593 return !isI64 && !isVector; // TODO(stichnot): implement | |
|
jvoung (off chromium)
2015/06/16 17:59:22
not TODO for Add anymore?
Jim Stichnoth
2015/06/17 00:15:40
I meant implement the i64 and vector variants. Cl
| |
| 594 case InstArithmetic::Sub: | |
| 595 case InstArithmetic::And: | |
| 596 case InstArithmetic::Or: | |
| 597 case InstArithmetic::Xor: | |
| 598 case InstArithmetic::Fadd: | |
| 599 case InstArithmetic::Fsub: | |
| 600 case InstArithmetic::Fmul: | |
| 601 case InstArithmetic::Fdiv: | |
| 602 return false; // TODO(stichnot): implement | |
| 603 return true; | |
| 604 case InstArithmetic::Shl: | |
| 605 case InstArithmetic::Lshr: | |
| 606 case InstArithmetic::Ashr: | |
| 607 return false; // TODO(stichnot): implement | |
| 608 return !isI64 && !isVector; | |
| 609 } | |
| 610 // Not handled for lack of simple lowering: | |
| 611 // shift on i64 and vectors | |
|
jvoung (off chromium)
2015/06/16 17:59:22
Maybe put this earlier, to explain the "return !is
Jim Stichnoth
2015/06/17 00:15:40
Done.
| |
| 612 // mul, udiv, urem, sdiv, srem, frem | |
| 613 } | |
| 614 | |
| 615 } // end of anonymous namespace | |
| 616 | |
| 617 void TargetX8632::findRMW() { | |
| 618 OstreamLocker L(Func->getContext()); | |
| 619 Ostream &Str = Func->getContext()->getStrDump(); | |
| 620 for (CfgNode *Node : Func->getNodes()) { | |
| 621 // Walk through the instructions, considering each sequence of 3 | |
| 622 // instructions, and look for the particular RMW pattern. Note that this | |
| 623 // search can be "broken" (false negatives) if there are intervening deleted | |
| 624 // instructions, or intervening instructions that could be safely moved out | |
| 625 // of the way to reveal an RMW pattern. | |
| 626 auto E = Node->getInsts().end(); | |
| 627 auto I1 = E, I2 = E, I3 = Node->getInsts().begin(); | |
| 628 for (; I3 != E; I1 = I2, I2 = I3, ++I3) { | |
| 629 if (I1 == E || I2 == E) | |
| 630 continue; | |
| 631 if (I1->isDeleted() || I2->isDeleted() || I3->isDeleted()) | |
| 632 continue; | |
| 633 if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) { | |
| 634 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) { | |
| 635 if (auto *Store = llvm::dyn_cast<InstStore>(I3)) { | |
| 636 // Look for: | |
| 637 // a = Load addr | |
| 638 // b = <op> a, other | |
| 639 // Store b, addr | |
| 640 // Change to: | |
| 641 // a = Load addr | |
| 642 // b = <op> a, other | |
| 643 // x = FakeDef | |
| 644 // RMW <op>, addr, other, x | |
| 645 // b = Store b, addr, x | |
|
jvoung (off chromium)
2015/06/16 17:59:21
Is this redefinition of b from " b = Store b, addr
Jim Stichnoth
2015/06/17 00:15:40
Yes, clarified in the comment.
| |
| 646 // | |
| 647 // With this transformation, the Store instruction acquires a Dest | |
| 648 // variable and is now subject to dead code elimination if there are | |
| 649 // no more uses of "b". Variable "x" is a beacon for determining | |
| 650 // whether the Store instruction gets dead-code eliminated. If the | |
| 651 // Store instruction is eliminated, then it must be the case that | |
| 652 // the RMW instruction ends x's live range, and therefore the RMW | |
| 653 // instruction will be retained and later lowered. On the other | |
| 654 // hand, if the RMW instruction does not end x's live range, then | |
| 655 // the Store instruction must still be present, and therefore the | |
| 656 // RMW instruction is ignored during lowering because it is | |
| 657 // redundant with the Store instruction. | |
| 658 // | |
| 659 // Note that if "a" has further uses, the RMW transformation may | |
| 660 // still trigger, resulting in two loads and one store, which is | |
| 661 // worse than the original one load and one store. However, this is | |
| 662 // probably rare, and caching probably keeps it just as fast. | |
| 663 if (Load->getSourceAddress() != Store->getAddr()) | |
| 664 continue; | |
| 665 if (Arith->getSrc(0) != Load->getDest()) | |
| 666 continue; | |
| 667 if (Arith->getDest() != Store->getData()) | |
| 668 continue; | |
| 669 if (!canRMW(Arith)) | |
| 670 continue; | |
| 671 if (Func->isVerbose(IceV_RMW)) { | |
| 672 Str << "Found RMW in " << Func->getFunctionName() << ":\n "; | |
| 673 Load->dump(Func); | |
| 674 Str << "\n "; | |
| 675 Arith->dump(Func); | |
| 676 Str << "\n "; | |
| 677 Store->dump(Func); | |
| 678 Str << "\n"; | |
| 679 } | |
| 680 Variable *Beacon = Func->makeVariable(IceType_i32); | |
| 681 Beacon->setWeight(0); | |
| 682 Store->setRmwBeacon(Beacon); | |
| 683 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon); | |
| 684 Node->getInsts().insert(I3, BeaconDef); | |
| 685 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create( | |
| 686 Func, Arith->getSrc(1), Store->getAddr(), Beacon, | |
| 687 Arith->getOp()); | |
| 688 Node->getInsts().insert(I3, RMW); | |
|
jvoung (off chromium)
2015/06/16 17:59:21
Just checking: should this be insert(I3, RMW) or i
Jim Stichnoth
2015/06/17 00:15:40
This is as intended. First we insert the BeaconDe
| |
| 689 } | |
| 690 } | |
| 691 } | |
| 692 } | |
| 693 } | |
| 694 } | |
| 695 | |
| 696 namespace { | |
| 697 | |
| 582 // Converts a ConstantInteger32 operand into its constant value, or | 698 // Converts a ConstantInteger32 operand into its constant value, or |
| 583 // MemoryOrderInvalid if the operand is not a ConstantInteger32. | 699 // MemoryOrderInvalid if the operand is not a ConstantInteger32. |
| 584 uint64_t getConstantMemoryOrder(Operand *Opnd) { | 700 uint64_t getConstantMemoryOrder(Operand *Opnd) { |
| 585 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 701 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
| 586 return Integer->getValue(); | 702 return Integer->getValue(); |
| 587 return Intrinsics::MemoryOrderInvalid; | 703 return Intrinsics::MemoryOrderInvalid; |
| 588 } | 704 } |
| 589 | 705 |
| 590 // Determines whether the dest of a Load instruction can be folded | 706 // Determines whether the dest of a Load instruction can be folded |
| 591 // into one of the src operands of a 2-operand instruction. This is | 707 // into one of the src operands of a 2-operand instruction. This is |
| (...skipping 3776 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4368 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); | 4484 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); |
| 4369 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); | 4485 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); |
| 4370 } else if (isVectorType(Ty)) { | 4486 } else if (isVectorType(Ty)) { |
| 4371 _storep(legalizeToVar(Value), NewAddr); | 4487 _storep(legalizeToVar(Value), NewAddr); |
| 4372 } else { | 4488 } else { |
| 4373 Value = legalize(Value, Legal_Reg | Legal_Imm); | 4489 Value = legalize(Value, Legal_Reg | Legal_Imm); |
| 4374 _store(Value, NewAddr); | 4490 _store(Value, NewAddr); |
| 4375 } | 4491 } |
| 4376 } | 4492 } |
| 4377 | 4493 |
| 4378 void TargetX8632::doAddressOptStore() { | 4494 void TargetX8632::doAddressOptStore() { |
|
jvoung (off chromium)
2015/06/16 17:59:22
At some point, is it worth having a doAddressOptRM
Jim Stichnoth
2015/06/17 00:15:40
Yikes! I didn't mean to disable address mode opti
jvoung (off chromium)
2015/06/17 16:40:22
Could add a test case to the .ll that shows addres
Jim Stichnoth
2015/06/18 05:15:12
Another good catch! I forgot that the address mod
| |
| 4379 InstStore *Inst = llvm::cast<InstStore>(Context.getCur()); | 4495 InstStore *Inst = llvm::cast<InstStore>(Context.getCur()); |
| 4380 Operand *Data = Inst->getData(); | 4496 Operand *Data = Inst->getData(); |
| 4381 Operand *Addr = Inst->getAddr(); | 4497 Operand *Addr = Inst->getAddr(); |
| 4382 Variable *Index = nullptr; | 4498 Variable *Index = nullptr; |
| 4383 uint16_t Shift = 0; | 4499 uint16_t Shift = 0; |
| 4384 int32_t Offset = 0; // TODO: make Constant | 4500 int32_t Offset = 0; // TODO: make Constant |
| 4385 Variable *Base = llvm::dyn_cast<Variable>(Addr); | 4501 Variable *Base = llvm::dyn_cast<Variable>(Addr); |
| 4386 // Vanilla ICE store instructions should not use the segment registers, | 4502 // Vanilla ICE store instructions should not use the segment registers, |
| 4387 // and computeAddressOpt only works at the level of Variables and Constants, | 4503 // and computeAddressOpt only works at the level of Variables and Constants, |
| 4388 // not other OperandX8632Mem, so there should be no mention of segment | 4504 // not other OperandX8632Mem, so there should be no mention of segment |
| 4389 // registers there either. | 4505 // registers there either. |
| 4390 const OperandX8632Mem::SegmentRegisters SegmentReg = | 4506 const OperandX8632Mem::SegmentRegisters SegmentReg = |
| 4391 OperandX8632Mem::DefaultSegment; | 4507 OperandX8632Mem::DefaultSegment; |
| 4392 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); | 4508 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); |
| 4393 if (Base && Addr != Base) { | 4509 if (Base && Addr != Base) { |
| 4394 Inst->setDeleted(); | 4510 Inst->setDeleted(); |
| 4395 Constant *OffsetOp = Ctx->getConstantInt32(Offset); | 4511 Constant *OffsetOp = Ctx->getConstantInt32(Offset); |
| 4396 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, | 4512 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, |
| 4397 Shift, SegmentReg); | 4513 Shift, SegmentReg); |
| 4398 Context.insert(InstStore::create(Func, Data, Addr)); | 4514 InstStore *NewStore = InstStore::create(Func, Data, Addr); |
| 4515 if (Inst->getDest()) | |
| 4516 NewStore->setRmwBeacon(Inst->getRmwBeacon()); | |
| 4517 Context.insert(NewStore); | |
| 4399 } | 4518 } |
| 4400 } | 4519 } |
| 4401 | 4520 |
| 4402 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { | 4521 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { |
| 4403 // This implements the most naive possible lowering. | 4522 // This implements the most naive possible lowering. |
| 4404 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default | 4523 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default |
| 4405 Operand *Src0 = Inst->getComparison(); | 4524 Operand *Src0 = Inst->getComparison(); |
| 4406 SizeT NumCases = Inst->getNumCases(); | 4525 SizeT NumCases = Inst->getNumCases(); |
| 4407 if (Src0->getType() == IceType_i64) { | 4526 if (Src0->getType() == IceType_i64) { |
| 4408 Src0 = legalize(Src0); // get Base/Index into physical registers | 4527 Src0 = legalize(Src0); // get Base/Index into physical registers |
| (...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4491 NextCast->setDeleted(); | 4610 NextCast->setDeleted(); |
| 4492 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); | 4611 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); |
| 4493 // Skip over the instruction. | 4612 // Skip over the instruction. |
| 4494 Context.advanceNext(); | 4613 Context.advanceNext(); |
| 4495 } | 4614 } |
| 4496 } | 4615 } |
| 4497 } | 4616 } |
| 4498 | 4617 |
| 4499 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); } | 4618 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); } |
| 4500 | 4619 |
| 4620 void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) { | |
| 4621 // If the beacon variable's live range does not end in this | |
| 4622 // instruction, then it must end in the modified Store instruction | |
| 4623 // that follows. This means that the original Store instruction is | |
| 4624 // still there, either because the value being stored is used beyond | |
| 4625 // the Store instruction, or because dead code elimination did not | |
| 4626 // happen. In either case, we cancel RMW lowering (and the caller | |
| 4627 // deletes the RMW instruction). | |
| 4628 if (!RMW->isLastUse(RMW->getBeacon())) | |
| 4629 return; | |
| 4630 Operand *Src = RMW->getData(); | |
| 4631 Type Ty = Src->getType(); | |
| 4632 Operand *Addr = formMemoryOperand(RMW->getAddr(), Ty); | |
| 4633 if (Ty == IceType_i64) { | |
| 4634 // TODO(stichnot): Implement. | |
| 4635 } else if (isVectorType(Ty)) { | |
| 4636 // TODO(stichnot): Implement. | |
| 4637 } else { | |
| 4638 // i8, i16, i32, f32, f64 | |
| 4639 switch (RMW->getOp()) { | |
| 4640 default: | |
| 4641 // TODO(stichnot): Implement other arithmetic operators. | |
| 4642 break; | |
| 4643 case InstArithmetic::Add: | |
| 4644 Src = legalize(Src, Legal_Reg | Legal_Imm); | |
| 4645 _add_rmw(Addr, Src); | |
| 4646 return; | |
| 4647 } | |
| 4648 } | |
| 4649 llvm::report_fatal_error("Couldn't lower RMW instruction"); | |
| 4650 } | |
| 4651 | |
| 4652 void TargetX8632::lowerOther(const Inst *Instr) { | |
| 4653 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { | |
| 4654 lowerRMW(RMW); | |
| 4655 } else { | |
| 4656 TargetLowering::lowerOther(Instr); | |
| 4657 } | |
| 4658 } | |
| 4659 | |
| 4501 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to | 4660 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to |
| 4502 // preserve integrity of liveness analysis. Undef values are also | 4661 // preserve integrity of liveness analysis. Undef values are also |
| 4503 // turned into zeroes, since loOperand() and hiOperand() don't expect | 4662 // turned into zeroes, since loOperand() and hiOperand() don't expect |
| 4504 // Undef input. | 4663 // Undef input. |
| 4505 void TargetX8632::prelowerPhis() { | 4664 void TargetX8632::prelowerPhis() { |
| 4506 CfgNode *Node = Context.getNode(); | 4665 CfgNode *Node = Context.getNode(); |
| 4507 for (Inst &I : Node->getPhis()) { | 4666 for (Inst &I : Node->getPhis()) { |
| 4508 auto Phi = llvm::dyn_cast<InstPhi>(&I); | 4667 auto Phi = llvm::dyn_cast<InstPhi>(&I); |
| 4509 if (Phi->isDeleted()) | 4668 if (Phi->isDeleted()) |
| 4510 continue; | 4669 continue; |
| (...skipping 604 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5115 emitConstantPool<PoolTypeConverter<float>>(Ctx); | 5274 emitConstantPool<PoolTypeConverter<float>>(Ctx); |
| 5116 emitConstantPool<PoolTypeConverter<double>>(Ctx); | 5275 emitConstantPool<PoolTypeConverter<double>>(Ctx); |
| 5117 } break; | 5276 } break; |
| 5118 } | 5277 } |
| 5119 } | 5278 } |
| 5120 | 5279 |
| 5121 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx) | 5280 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx) |
| 5122 : TargetHeaderLowering(Ctx) {} | 5281 : TargetHeaderLowering(Ctx) {} |
| 5123 | 5282 |
| 5124 } // end of namespace Ice | 5283 } // end of namespace Ice |
| OLD | NEW |