Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(359)

Side by Side Diff: src/IceTargetLowering.cpp

Issue 1738443002: Subzero. Performance tweaks. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments -- all of them Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLowering.h ('k') | src/IceTargetLoweringARM32.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLowering.cpp - Basic lowering implementation --===// 1 //===- subzero/src/IceTargetLowering.cpp - Basic lowering implementation --===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after
111 111
112 Variable *LoweringContext::availabilityGet(Operand *Src) const { 112 Variable *LoweringContext::availabilityGet(Operand *Src) const {
113 assert(Src); 113 assert(Src);
114 if (Src == LastDest) 114 if (Src == LastDest)
115 return LastSrc; 115 return LastSrc;
116 return nullptr; 116 return nullptr;
117 } 117 }
118 118
119 namespace { 119 namespace {
120 120
121 void printRegisterSet(Ostream &Str, const llvm::SmallBitVector &Bitset, 121 void printRegisterSet(Ostream &Str, const SmallBitVector &Bitset,
122 std::function<IceString(RegNumT)> getRegName, 122 std::function<IceString(RegNumT)> getRegName,
123 const IceString &LineIndentString) { 123 const IceString &LineIndentString) {
124 constexpr size_t RegistersPerLine = 16; 124 constexpr size_t RegistersPerLine = 16;
125 size_t Count = 0; 125 size_t Count = 0;
126 for (RegNumT RegNum : RegNumBVIter(Bitset)) { 126 for (RegNumT RegNum : RegNumBVIter(Bitset)) {
127 if (Count == 0) { 127 if (Count == 0) {
128 Str << LineIndentString; 128 Str << LineIndentString;
129 } else { 129 } else {
130 Str << ","; 130 Str << ",";
131 } 131 }
(...skipping 23 matching lines...) Expand all
155 } 155 }
156 156
157 LLVM_ATTRIBUTE_NORETURN void badTargetFatalError(TargetArch Target) { 157 LLVM_ATTRIBUTE_NORETURN void badTargetFatalError(TargetArch Target) {
158 llvm::report_fatal_error("Unsupported target: " + 158 llvm::report_fatal_error("Unsupported target: " +
159 std::string(targetArchString(Target))); 159 std::string(targetArchString(Target)));
160 } 160 }
161 161
162 } // end of anonymous namespace 162 } // end of anonymous namespace
163 163
164 void TargetLowering::filterTypeToRegisterSet( 164 void TargetLowering::filterTypeToRegisterSet(
165 GlobalContext *Ctx, int32_t NumRegs, 165 GlobalContext *Ctx, int32_t NumRegs, SmallBitVector TypeToRegisterSet[],
166 llvm::SmallBitVector TypeToRegisterSet[], size_t TypeToRegisterSetSize, 166 size_t TypeToRegisterSetSize, std::function<IceString(RegNumT)> getRegName,
167 std::function<IceString(RegNumT)> getRegName,
168 std::function<IceString(RegClass)> getRegClassName) { 167 std::function<IceString(RegClass)> getRegClassName) {
169 std::vector<llvm::SmallBitVector> UseSet(TypeToRegisterSetSize, 168 std::vector<SmallBitVector> UseSet(TypeToRegisterSetSize,
170 llvm::SmallBitVector(NumRegs)); 169 SmallBitVector(NumRegs));
171 std::vector<llvm::SmallBitVector> ExcludeSet(TypeToRegisterSetSize, 170 std::vector<SmallBitVector> ExcludeSet(TypeToRegisterSetSize,
172 llvm::SmallBitVector(NumRegs)); 171 SmallBitVector(NumRegs));
173 172
174 std::unordered_map<IceString, RegNumT> RegNameToIndex; 173 std::unordered_map<IceString, RegNumT> RegNameToIndex;
175 for (int32_t RegIndex = 0; RegIndex < NumRegs; ++RegIndex) { 174 for (int32_t RegIndex = 0; RegIndex < NumRegs; ++RegIndex) {
176 const auto RegNum = RegNumT::fromInt(RegIndex); 175 const auto RegNum = RegNumT::fromInt(RegIndex);
177 RegNameToIndex[getRegName(RegNum)] = RegNum; 176 RegNameToIndex[getRegName(RegNum)] = RegNum;
178 } 177 }
179 178
180 ClFlags::StringVector BadRegNames; 179 ClFlags::StringVector BadRegNames;
181 180
182 // The processRegList function iterates across the RegNames vector. Each 181 // The processRegList function iterates across the RegNames vector. Each
183 // entry in the vector is a string of the form "<reg>" or "<class>:<reg>". 182 // entry in the vector is a string of the form "<reg>" or "<class>:<reg>".
184 // The register class and register number are computed, and the corresponding 183 // The register class and register number are computed, and the corresponding
185 // bit is set in RegSet[][]. If "<class>:" is missing, then the bit is set 184 // bit is set in RegSet[][]. If "<class>:" is missing, then the bit is set
186 // for all classes. 185 // for all classes.
187 auto processRegList = [&](const ClFlags::StringVector &RegNames, 186 auto processRegList = [&](const ClFlags::StringVector &RegNames,
188 std::vector<llvm::SmallBitVector> &RegSet) { 187 std::vector<SmallBitVector> &RegSet) {
189 for (const IceString &RegClassAndName : RegNames) { 188 for (const IceString &RegClassAndName : RegNames) {
190 IceString RClass; 189 IceString RClass;
191 IceString RName; 190 IceString RName;
192 splitToClassAndName(RegClassAndName, &RClass, &RName); 191 splitToClassAndName(RegClassAndName, &RClass, &RName);
193 if (!RegNameToIndex.count(RName)) { 192 if (!RegNameToIndex.count(RName)) {
194 BadRegNames.push_back(RName); 193 BadRegNames.push_back(RName);
195 continue; 194 continue;
196 } 195 }
197 const int32_t RegIndex = RegNameToIndex.at(RName); 196 const int32_t RegIndex = RegNameToIndex.at(RName);
198 for (SizeT TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; 197 for (SizeT TypeIndex = 0; TypeIndex < TypeToRegisterSetSize;
(...skipping 13 matching lines...) Expand all
212 std::string Buffer; 211 std::string Buffer;
213 llvm::raw_string_ostream StrBuf(Buffer); 212 llvm::raw_string_ostream StrBuf(Buffer);
214 StrBuf << "Unrecognized use/exclude registers:"; 213 StrBuf << "Unrecognized use/exclude registers:";
215 for (const auto &RegName : BadRegNames) 214 for (const auto &RegName : BadRegNames)
216 StrBuf << " " << RegName; 215 StrBuf << " " << RegName;
217 llvm::report_fatal_error(StrBuf.str()); 216 llvm::report_fatal_error(StrBuf.str());
218 } 217 }
219 218
220 // Apply filters. 219 // Apply filters.
221 for (size_t TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) { 220 for (size_t TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) {
222 llvm::SmallBitVector *TypeBitSet = &TypeToRegisterSet[TypeIndex]; 221 SmallBitVector *TypeBitSet = &TypeToRegisterSet[TypeIndex];
223 llvm::SmallBitVector *UseBitSet = &UseSet[TypeIndex]; 222 SmallBitVector *UseBitSet = &UseSet[TypeIndex];
224 llvm::SmallBitVector *ExcludeBitSet = &ExcludeSet[TypeIndex]; 223 SmallBitVector *ExcludeBitSet = &ExcludeSet[TypeIndex];
225 if (UseBitSet->any()) 224 if (UseBitSet->any())
226 *TypeBitSet = *UseBitSet; 225 *TypeBitSet = *UseBitSet;
227 (*TypeBitSet).reset(*ExcludeBitSet); 226 (*TypeBitSet).reset(*ExcludeBitSet);
228 } 227 }
229 228
230 // Display filtered register sets, if requested. 229 // Display filtered register sets, if requested.
231 if (BuildDefs::dump() && NumRegs && 230 if (BuildDefs::dump() && NumRegs &&
232 (Ctx->getFlags().getVerbose() & IceV_AvailableRegs)) { 231 (Ctx->getFlags().getVerbose() & IceV_AvailableRegs)) {
233 Ostream &Str = Ctx->getStrDump(); 232 Ostream &Str = Ctx->getStrDump();
234 const IceString Indent = " "; 233 const IceString Indent = " ";
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after
463 // performance testing. 462 // performance testing.
464 void TargetLowering::regAlloc(RegAllocKind Kind) { 463 void TargetLowering::regAlloc(RegAllocKind Kind) {
465 TimerMarker T(TimerStack::TT_regAlloc, Func); 464 TimerMarker T(TimerStack::TT_regAlloc, Func);
466 LinearScan LinearScan(Func); 465 LinearScan LinearScan(Func);
467 RegSetMask RegInclude = RegSet_None; 466 RegSetMask RegInclude = RegSet_None;
468 RegSetMask RegExclude = RegSet_None; 467 RegSetMask RegExclude = RegSet_None;
469 RegInclude |= RegSet_CallerSave; 468 RegInclude |= RegSet_CallerSave;
470 RegInclude |= RegSet_CalleeSave; 469 RegInclude |= RegSet_CalleeSave;
471 if (hasFramePointer()) 470 if (hasFramePointer())
472 RegExclude |= RegSet_FramePointer; 471 RegExclude |= RegSet_FramePointer;
473 llvm::SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude); 472 SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude);
474 bool Repeat = (Kind == RAK_Global && Ctx->getFlags().shouldRepeatRegAlloc()); 473 bool Repeat = (Kind == RAK_Global && Ctx->getFlags().shouldRepeatRegAlloc());
475 do { 474 do {
476 LinearScan.init(Kind); 475 LinearScan.init(Kind);
477 LinearScan.scan(RegMask, Ctx->getFlags().shouldRandomizeRegAlloc()); 476 LinearScan.scan(RegMask, Ctx->getFlags().shouldRandomizeRegAlloc());
478 if (!LinearScan.hasEvictions()) 477 if (!LinearScan.hasEvictions())
479 Repeat = false; 478 Repeat = false;
480 Kind = RAK_SecondChance; 479 Kind = RAK_SecondChance;
481 } while (Repeat); 480 } while (Repeat);
482 // TODO(stichnot): Run the register allocator one more time to do stack slot 481 // TODO(stichnot): Run the register allocator one more time to do stack slot
483 // coalescing. The idea would be to initialize the Unhandled list with the 482 // coalescing. The idea would be to initialize the Unhandled list with the
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
532 // Instead of std::sort, we could do a bucket sort with log2(alignment) as 531 // Instead of std::sort, we could do a bucket sort with log2(alignment) as
533 // the buckets, if performance is an issue. 532 // the buckets, if performance is an issue.
534 std::sort(Dest.begin(), Dest.end(), 533 std::sort(Dest.begin(), Dest.end(),
535 [this](const Variable *V1, const Variable *V2) { 534 [this](const Variable *V1, const Variable *V2) {
536 return typeWidthInBytesOnStack(V1->getType()) > 535 return typeWidthInBytesOnStack(V1->getType()) >
537 typeWidthInBytesOnStack(V2->getType()); 536 typeWidthInBytesOnStack(V2->getType());
538 }); 537 });
539 } 538 }
540 539
541 void TargetLowering::getVarStackSlotParams( 540 void TargetLowering::getVarStackSlotParams(
542 VarList &SortedSpilledVariables, llvm::SmallBitVector &RegsUsed, 541 VarList &SortedSpilledVariables, SmallBitVector &RegsUsed,
543 size_t *GlobalsSize, size_t *SpillAreaSizeBytes, 542 size_t *GlobalsSize, size_t *SpillAreaSizeBytes,
544 uint32_t *SpillAreaAlignmentBytes, uint32_t *LocalsSlotsAlignmentBytes, 543 uint32_t *SpillAreaAlignmentBytes, uint32_t *LocalsSlotsAlignmentBytes,
545 std::function<bool(Variable *)> TargetVarHook) { 544 std::function<bool(Variable *)> TargetVarHook) {
546 const VariablesMetadata *VMetadata = Func->getVMetadata(); 545 const VariablesMetadata *VMetadata = Func->getVMetadata();
547 llvm::BitVector IsVarReferenced(Func->getNumVariables()); 546 llvm::BitVector IsVarReferenced(Func->getNumVariables());
548 for (CfgNode *Node : Func->getNodes()) { 547 for (CfgNode *Node : Func->getNodes()) {
549 for (Inst &Instr : Node->getInsts()) { 548 for (Inst &Instr : Node->getInsts()) {
550 if (Instr.isDeleted()) 549 if (Instr.isDeleted())
551 continue; 550 continue;
552 if (const Variable *Var = Instr.getDest()) 551 if (const Variable *Var = Instr.getDest())
(...skipping 10 matching lines...) Expand all
563 // gets its own slot, but "local" variable slots are reused across basic 562 // gets its own slot, but "local" variable slots are reused across basic
564 // blocks. E.g., if A and B are local to block 1 and C is local to block 2, 563 // blocks. E.g., if A and B are local to block 1 and C is local to block 2,
565 // then C may share a slot with A or B. 564 // then C may share a slot with A or B.
566 // 565 //
567 // We cannot coalesce stack slots if this function calls a "returns twice" 566 // We cannot coalesce stack slots if this function calls a "returns twice"
568 // function. In that case, basic blocks may be revisited, and variables local 567 // function. In that case, basic blocks may be revisited, and variables local
569 // to those basic blocks are actually live until after the called function 568 // to those basic blocks are actually live until after the called function
570 // returns a second time. 569 // returns a second time.
571 const bool SimpleCoalescing = !callsReturnsTwice(); 570 const bool SimpleCoalescing = !callsReturnsTwice();
572 571
573 std::vector<size_t> LocalsSize(Func->getNumNodes()); 572 CfgVector<size_t> LocalsSize(Func->getNumNodes());
574 const VarList &Variables = Func->getVariables(); 573 const VarList &Variables = Func->getVariables();
575 VarList SpilledVariables; 574 VarList SpilledVariables;
576 for (Variable *Var : Variables) { 575 for (Variable *Var : Variables) {
577 if (Var->hasReg()) { 576 if (Var->hasReg()) {
578 // Don't consider a rematerializable variable to be an actual register use 577 // Don't consider a rematerializable variable to be an actual register use
579 // (specifically of the frame pointer). Otherwise, the prolog may decide 578 // (specifically of the frame pointer). Otherwise, the prolog may decide
580 // to save the frame pointer twice - once because of the explicit need for 579 // to save the frame pointer twice - once because of the explicit need for
581 // a frame pointer, and once because of an active use of a callee-save 580 // a frame pointer, and once because of an active use of a callee-save
582 // register. 581 // register.
583 if (!Var->isRematerializable()) 582 if (!Var->isRematerializable())
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
661 // SpillAreaSizeBytes has accounted for the extra test padding. When 660 // SpillAreaSizeBytes has accounted for the extra test padding. When
662 // UseFramePointer is true, the offset depends on the padding, not just the 661 // UseFramePointer is true, the offset depends on the padding, not just the
663 // SpillAreaSizeBytes. On the other hand, when UseFramePointer is false, the 662 // SpillAreaSizeBytes. On the other hand, when UseFramePointer is false, the
664 // offsets depend on the gap between SpillAreaSizeBytes and 663 // offsets depend on the gap between SpillAreaSizeBytes and
665 // SpillAreaPaddingBytes, so we don't increment that. 664 // SpillAreaPaddingBytes, so we don't increment that.
666 size_t TestPadding = Ctx->getFlags().getTestStackExtra(); 665 size_t TestPadding = Ctx->getFlags().getTestStackExtra();
667 if (UsesFramePointer) 666 if (UsesFramePointer)
668 SpillAreaPaddingBytes += TestPadding; 667 SpillAreaPaddingBytes += TestPadding;
669 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes; 668 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
670 size_t NextStackOffset = SpillAreaPaddingBytes; 669 size_t NextStackOffset = SpillAreaPaddingBytes;
671 std::vector<size_t> LocalsSize(Func->getNumNodes()); 670 CfgVector<size_t> LocalsSize(Func->getNumNodes());
672 const bool SimpleCoalescing = !callsReturnsTwice(); 671 const bool SimpleCoalescing = !callsReturnsTwice();
673 672
674 for (Variable *Var : SortedSpilledVariables) { 673 for (Variable *Var : SortedSpilledVariables) {
675 size_t Increment = typeWidthInBytesOnStack(Var->getType()); 674 size_t Increment = typeWidthInBytesOnStack(Var->getType());
676 if (SimpleCoalescing && VMetadata->isTracked(Var)) { 675 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
677 if (VMetadata->isMultiBlock(Var)) { 676 if (VMetadata->isMultiBlock(Var)) {
678 GlobalsSpaceUsed += Increment; 677 GlobalsSpaceUsed += Increment;
679 NextStackOffset = GlobalsSpaceUsed; 678 NextStackOffset = GlobalsSpaceUsed;
680 } else { 679 } else {
681 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex(); 680 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after
884 case Target_##X: \ 883 case Target_##X: \
885 return ::X::createTargetHeaderLowering(Ctx); 884 return ::X::createTargetHeaderLowering(Ctx);
886 #include "llvm/Config/SZTargets.def" 885 #include "llvm/Config/SZTargets.def"
887 #undef SUBZERO_TARGET 886 #undef SUBZERO_TARGET
888 } 887 }
889 } 888 }
890 889
891 TargetHeaderLowering::~TargetHeaderLowering() = default; 890 TargetHeaderLowering::~TargetHeaderLowering() = default;
892 891
893 } // end of namespace Ice 892 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLowering.h ('k') | src/IceTargetLoweringARM32.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698