Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1234803007: Introduction of improved switch lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Safer check for 0 size type Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | src/IceTypes.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 11 matching lines...) Expand all
22 #include "IceClFlags.h" 22 #include "IceClFlags.h"
23 #include "IceDefs.h" 23 #include "IceDefs.h"
24 #include "IceELFObjectWriter.h" 24 #include "IceELFObjectWriter.h"
25 #include "IceGlobalInits.h" 25 #include "IceGlobalInits.h"
26 #include "IceLiveness.h" 26 #include "IceLiveness.h"
27 #include "IceOperand.h" 27 #include "IceOperand.h"
28 #include "IcePhiLoweringImpl.h" 28 #include "IcePhiLoweringImpl.h"
29 #include "IceUtils.h" 29 #include "IceUtils.h"
30 #include "llvm/Support/MathExtras.h" 30 #include "llvm/Support/MathExtras.h"
31 31
32 #include <stack>
33
32 namespace Ice { 34 namespace Ice {
33 namespace X86Internal { 35 namespace X86Internal {
34 36
35 /// A helper class to ease the settings of RandomizationPoolingPause to disable 37 /// A helper class to ease the settings of RandomizationPoolingPause to disable
36 /// constant blinding or pooling for some translation phases. 38 /// constant blinding or pooling for some translation phases.
37 class BoolFlagSaver { 39 class BoolFlagSaver {
38 BoolFlagSaver() = delete; 40 BoolFlagSaver() = delete;
39 BoolFlagSaver(const BoolFlagSaver &) = delete; 41 BoolFlagSaver(const BoolFlagSaver &) = delete;
40 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; 42 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;
41 43
(...skipping 4445 matching lines...) Expand 10 before | Expand all | Expand 10 after
4487 Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp, 4489 Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp,
4488 Index, Shift, SegmentReg); 4490 Index, Shift, SegmentReg);
4489 InstStore *NewStore = InstStore::create(Func, Data, Addr); 4491 InstStore *NewStore = InstStore::create(Func, Data, Addr);
4490 if (Inst->getDest()) 4492 if (Inst->getDest())
4491 NewStore->setRmwBeacon(Inst->getRmwBeacon()); 4493 NewStore->setRmwBeacon(Inst->getRmwBeacon());
4492 Context.insert(NewStore); 4494 Context.insert(NewStore);
4493 } 4495 }
4494 } 4496 }
4495 4497
4496 template <class Machine> 4498 template <class Machine>
4499 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison,
4500 uint64_t Min, uint64_t Max) {
4501 // TODO(ascull): 64-bit should not reach here but only because it is not
4502 // implemented yet. This should be able to handle the 64-bit case.
4503 assert(Comparison->getType() != IceType_i64);
4504 // Subtracting 0 is a nop so don't do it
4505 if (Min != 0) {
4506 // Avoid clobbering the comparison by copying it
4507 Variable *T = nullptr;
4508 _mov(T, Comparison);
4509 _sub(T, Ctx->getConstantInt32(Min));
4510 Comparison = T;
4511 }
4512
4513 _cmp(Comparison, Ctx->getConstantInt32(Max - Min));
4514
4515 return Comparison;
4516 }
4517
4518 template <class Machine>
4519 void TargetX86Base<Machine>::lowerCaseCluster(const CaseCluster &Case,
4520 Operand *Comparison, bool DoneCmp,
4521 CfgNode *DefaultLabel) {
4522 switch (Case.getKind()) {
4523 case CaseCluster::JumpTable: {
4524 typename Traits::Insts::Label *SkipJumpTable;
4525
4526 Operand *RangeIndex =
4527 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
4528 if (DefaultLabel != nullptr) {
4529 _br(Traits::Cond::Br_a, DefaultLabel);
4530 } else {
4531 // Skip over jump table logic if comparison not in range and no default
4532 SkipJumpTable = Traits::Insts::Label::create(Func, this);
4533 _br(Traits::Cond::Br_a, SkipJumpTable);
4534 }
4535
4536 InstJumpTable *JumpTable = Case.getJumpTable();
4537 Context.insert(JumpTable);
4538
4539 // Make sure the index is a register of the same width as the base
4540 Variable *Index;
4541 if (RangeIndex->getType() != getPointerType()) {
4542 Index = makeReg(getPointerType());
4543 _movzx(Index, RangeIndex);
4544 } else {
4545 Index = legalizeToVar(RangeIndex);
4546 }
4547
4548 constexpr RelocOffsetT RelocOffset = 0;
4549 constexpr bool SuppressMangling = true;
4550 Constant *Base = Ctx->getConstantSym(RelocOffset, JumpTable->getName(Func),
4551 SuppressMangling);
4552 Constant *Offset = nullptr;
4553 uint16_t Shift = typeWidthInBytesLog2(getPointerType());
4554 // TODO(ascull): remove need for legalize by allowing null base in memop
4555 auto *MemTarget = Traits::X86OperandMem::create(
4556 Func, getPointerType(), legalizeToVar(Base), Offset, Index, Shift);
4557 Variable *Target = nullptr;
4558 _mov(Target, MemTarget);
4559 _jmp(Target);
4560 // TODO(ascull): sandboxing for indirect jump
4561
4562 if (DefaultLabel == nullptr)
4563 Context.insert(SkipJumpTable);
4564 return;
4565 }
4566 case CaseCluster::Range: {
4567 if (Case.getHigh() == Case.getLow()) {
4568 // Single item
4569 Constant *Value = Ctx->getConstantInt32(Case.getLow());
4570 if (!DoneCmp)
4571 _cmp(Comparison, Value);
4572 _br(Traits::Cond::Br_e, Case.getLabel());
4573 if (DefaultLabel != nullptr)
4574 _br(DefaultLabel);
4575 } else {
4576 // Range
4577 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
4578 _br(Traits::Cond::Br_be, Case.getLabel());
4579 if (DefaultLabel != nullptr)
4580 _br(DefaultLabel);
4581 }
4582 return;
4583 }
4584 }
4585 }
4586
4587 template <class Machine>
4497 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { 4588 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {
4498 // This implements the most naive possible lowering. 4589 // Do it the old fashioned way unless asked for the advanced method
4499 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default 4590 if (!Ctx->getFlags().getUseAdvancedSwitchLowering()) {
4591 // This implements the most naive possible lowering.
4592 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
4593 Operand *Src0 = Inst->getComparison();
4594 SizeT NumCases = Inst->getNumCases();
4595 if (Src0->getType() == IceType_i64) {
4596 Src0 = legalize(Src0); // get Base/Index into physical registers
4597 Operand *Src0Lo = loOperand(Src0);
4598 Operand *Src0Hi = hiOperand(Src0);
4599 if (NumCases >= 2) {
4600 Src0Lo = legalizeToVar(Src0Lo);
4601 Src0Hi = legalizeToVar(Src0Hi);
4602 } else {
4603 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
4604 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4605 }
4606 for (SizeT I = 0; I < NumCases; ++I) {
4607 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
4608 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
4609 typename Traits::Insts::Label *Label =
4610 Traits::Insts::Label::create(Func, this);
4611 _cmp(Src0Lo, ValueLo);
4612 _br(Traits::Cond::Br_ne, Label);
4613 _cmp(Src0Hi, ValueHi);
4614 _br(Traits::Cond::Br_e, Inst->getLabel(I));
4615 Context.insert(Label);
4616 }
4617 _br(Inst->getLabelDefault());
4618 return;
4619 }
4620 // OK, we'll be slightly less naive by forcing Src into a physical
4621 // register if there are 2 or more uses.
4622 if (NumCases >= 2)
4623 Src0 = legalizeToVar(Src0);
4624 else
4625 Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
4626 for (SizeT I = 0; I < NumCases; ++I) {
4627 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));
4628 _cmp(Src0, Value);
4629 _br(Traits::Cond::Br_e, Inst->getLabel(I));
4630 }
4631
4632 _br(Inst->getLabelDefault());
4633 return;
4634 }
4635
4636 // Group cases together and navigate through them with a binary search
4637 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst);
4500 Operand *Src0 = Inst->getComparison(); 4638 Operand *Src0 = Inst->getComparison();
4501 SizeT NumCases = Inst->getNumCases(); 4639 CfgNode *DefaultLabel = Inst->getLabelDefault();
4640
4641 assert(CaseClusters.size() != 0); // Should always be at least one
4642
4502 if (Src0->getType() == IceType_i64) { 4643 if (Src0->getType() == IceType_i64) {
4503 Src0 = legalizeUndef(Src0); 4644 Src0 = legalize(Src0); // get Base/Index into physical registers
4504 Operand *Src0Lo = loOperand(Src0); 4645 Operand *Src0Lo = loOperand(Src0);
4505 Operand *Src0Hi = hiOperand(Src0); 4646 Operand *Src0Hi = hiOperand(Src0);
4506 if (NumCases >= 2) { 4647 if (CaseClusters.back().getHigh() > UINT32_MAX) {
4507 Src0Lo = legalizeToVar(Src0Lo); 4648 // TODO(ascull): handle 64-bit case properly (currently naive version)
4508 Src0Hi = legalizeToVar(Src0Hi); 4649 // This might be handled by a higher level lowering of switches.
4509 } else { 4650 SizeT NumCases = Inst->getNumCases();
4510 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); 4651 if (NumCases >= 2) {
4652 Src0Lo = legalizeToVar(Src0Lo);
4653 Src0Hi = legalizeToVar(Src0Hi);
4654 } else {
4655 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
4656 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4657 }
4658 for (SizeT I = 0; I < NumCases; ++I) {
4659 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
4660 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
4661 typename Traits::Insts::Label *Label =
4662 Traits::Insts::Label::create(Func, this);
4663 _cmp(Src0Lo, ValueLo);
4664 _br(Traits::Cond::Br_ne, Label);
4665 _cmp(Src0Hi, ValueHi);
4666 _br(Traits::Cond::Br_e, Inst->getLabel(I));
4667 Context.insert(Label);
4668 }
4669 _br(Inst->getLabelDefault());
4670 return;
4671 } else {
4672 // All the values are 32-bit so just check the operand is too and then
4673 // fall through to the 32-bit implementation. This is a common case.
4511 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); 4674 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4512 } 4675 Constant *Zero = Ctx->getConstantInt32(0);
4513 for (SizeT I = 0; I < NumCases; ++I) { 4676 _cmp(Src0Hi, Zero);
4514 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I)); 4677 _br(Traits::Cond::Br_ne, DefaultLabel);
4515 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32); 4678 Src0 = Src0Lo;
4679 }
4680 }
4681
4682 // 32-bit lowering
4683
4684 if (CaseClusters.size() == 1) {
4685 // Jump straight to default if needed. Currently a common case as jump
4686 // tables occur on their own.
4687 constexpr bool DoneCmp = false;
4688 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultLabel);
4689 return;
4690 }
4691
4692 // Going to be using multiple times so get it in a register early
4693 Variable *Comparison = legalizeToVar(Src0);
4694
4695 // A span is over the clusters
4696 struct SearchSpan {
4697 SearchSpan(SizeT Begin, SizeT Size, typename Traits::Insts::Label *Label)
4698 : Begin(Begin), Size(Size), Label(Label) {}
4699
4700 SizeT Begin;
4701 SizeT Size;
4702 typename Traits::Insts::Label *Label;
4703 };
4704 std::stack<SearchSpan, std::deque<SearchSpan, CfgLocalAllocator<SearchSpan>>>
4705 SearchSpanStack;
4706 SearchSpanStack.emplace(0, CaseClusters.size(), nullptr);
4707 bool DoneCmp = false;
4708
4709 while (!SearchSpanStack.empty()) {
4710 SearchSpan Span = SearchSpanStack.top();
4711 SearchSpanStack.pop();
4712
4713 if (Span.Label != nullptr)
4714 Context.insert(Span.Label);
4715
4716 switch (Span.Size) {
4717 case 0:
4718 llvm::report_fatal_error("Invalid SearchSpan size");
4719 break;
4720
4721 case 1:
4722 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp,
4723 SearchSpanStack.empty() ? nullptr : DefaultLabel);
4724 DoneCmp = false;
4725 break;
4726
4727 case 2:
4728 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp);
4729 DoneCmp = false;
4730 lowerCaseCluster(CaseClusters[Span.Begin + 1], Comparison, DoneCmp,
4731 SearchSpanStack.empty() ? nullptr : DefaultLabel);
4732 break;
4733
4734 default:
4735 // Pick the middle item and branch b or ae
4736 SizeT PivotIndex = Span.Begin + (Span.Size / 2);
4737 const CaseCluster &Pivot = CaseClusters[PivotIndex];
4738 Constant *Value = Ctx->getConstantInt32(Pivot.getLow());
4739 // TODO(ascull): what if this jump is too big?
4516 typename Traits::Insts::Label *Label = 4740 typename Traits::Insts::Label *Label =
4517 Traits::Insts::Label::create(Func, this); 4741 Traits::Insts::Label::create(Func, this);
4518 _cmp(Src0Lo, ValueLo); 4742 _cmp(Comparison, Value);
4519 _br(Traits::Cond::Br_ne, Label); 4743 _br(Traits::Cond::Br_b, Label);
4520 _cmp(Src0Hi, ValueHi); 4744 // Lower the left and (pivot+right) sides, falling through to the right
4521 _br(Traits::Cond::Br_e, Inst->getLabel(I)); 4745 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label);
4522 Context.insert(Label); 4746 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr);
4523 } 4747 DoneCmp = true;
4524 _br(Inst->getLabelDefault()); 4748 break;
4525 return; 4749 }
4526 } 4750 }
4527 // OK, we'll be slightly less naive by forcing Src into a physical 4751
4528 // register if there are 2 or more uses. 4752 _br(DefaultLabel);
4529 if (NumCases >= 2) 4753 }
4530 Src0 = legalizeToVar(Src0); 4754
4531 else 4755 template <class Machine>
4532 Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
4533 for (SizeT I = 0; I < NumCases; ++I) {
4534 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));
4535 _cmp(Src0, Value);
4536 _br(Traits::Cond::Br_e, Inst->getLabel(I));
4537 }
4538
4539 _br(Inst->getLabelDefault());
4540 }
4541
4542 template <class Machine>
4543 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind, 4756 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4544 Variable *Dest, Operand *Src0, 4757 Variable *Dest, Operand *Src0,
4545 Operand *Src1) { 4758 Operand *Src1) {
4546 assert(isVectorType(Dest->getType())); 4759 assert(isVectorType(Dest->getType()));
4547 Type Ty = Dest->getType(); 4760 Type Ty = Dest->getType();
4548 Type ElementTy = typeElementType(Ty); 4761 Type ElementTy = typeElementType(Ty);
4549 SizeT NumElements = typeNumElements(Ty); 4762 SizeT NumElements = typeNumElements(Ty);
4550 4763
4551 Operand *T = Ctx->getConstantUndef(Ty); 4764 Operand *T = Ctx->getConstantUndef(Ty);
4552 for (SizeT I = 0; I < NumElements; ++I) { 4765 for (SizeT I = 0; I < NumElements; ++I) {
(...skipping 853 matching lines...) Expand 10 before | Expand all | Expand 10 after
5406 } 5619 }
5407 // the offset is not eligible for blinding or pooling, return the original 5620 // the offset is not eligible for blinding or pooling, return the original
5408 // mem operand 5621 // mem operand
5409 return MemOperand; 5622 return MemOperand;
5410 } 5623 }
5411 5624
5412 } // end of namespace X86Internal 5625 } // end of namespace X86Internal
5413 } // end of namespace Ice 5626 } // end of namespace Ice
5414 5627
5415 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5628 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | src/IceTypes.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698