| OLD | NEW |
| 1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// | 1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 252 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 263 return true; | 263 return true; |
| 264 if (const auto Var2 = llvm::dyn_cast<Variable>(Opnd)) { | 264 if (const auto Var2 = llvm::dyn_cast<Variable>(Opnd)) { |
| 265 if (Var->hasReg() && Var->getRegNum() == Var2->getRegNum()) | 265 if (Var->hasReg() && Var->getRegNum() == Var2->getRegNum()) |
| 266 return true; | 266 return true; |
| 267 } | 267 } |
| 268 return false; | 268 return false; |
| 269 } | 269 } |
| 270 | 270 |
| 271 } // end of anonymous namespace | 271 } // end of anonymous namespace |
| 272 | 272 |
| 273 // This the "advanced" version of Phi lowering for a basic block, in | 273 // This the "advanced" version of Phi lowering for a basic block, in contrast to |
| 274 // contrast to the simple version that lowers through assignments | 274 // the simple version that lowers through assignments involving temporaries. |
| 275 // involving temporaries. | |
| 276 // | 275 // |
| 277 // All Phi instructions in a basic block are conceptually executed in | 276 // All Phi instructions in a basic block are conceptually executed in parallel. |
| 278 // parallel. However, if we lower Phis early and commit to a | 277 // However, if we lower Phis early and commit to a sequential ordering, we may |
| 279 // sequential ordering, we may end up creating unnecessary | 278 // end up creating unnecessary interferences which lead to worse register |
| 280 // interferences which lead to worse register allocation. Delaying | 279 // allocation. Delaying Phi scheduling until after register allocation can help |
| 281 // Phi scheduling until after register allocation can help unless | 280 // unless there are no free registers for shuffling registers or stack slots and |
| 282 // there are no free registers for shuffling registers or stack slots | 281 // spilling becomes necessary. |
| 283 // and spilling becomes necessary. | |
| 284 // | 282 // |
| 285 // The advanced Phi lowering starts by finding a topological sort of | 283 // The advanced Phi lowering starts by finding a topological sort of the Phi |
| 286 // the Phi instructions, where "A=B" comes before "B=C" due to the | 284 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on B. |
| 287 // anti-dependence on B. If a topological sort is not possible due to | 285 // Preexisting register assignments are considered in the topological sort. If |
| 288 // a cycle, the cycle is broken by introducing a non-parallel | 286 // a topological sort is not possible due to a cycle, the cycle is broken by |
| 289 // temporary. For example, a cycle arising from a permutation like | 287 // introducing a non-parallel temporary. For example, a cycle arising from a |
| 290 // "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being equal, | 288 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being |
| 291 // prefer to schedule assignments with register-allocated Src operands | 289 // equal, prefer to schedule assignments with register-allocated Src operands |
| 292 // earlier, in case that register becomes free afterwards, and prefer | 290 // earlier, in case that register becomes free afterwards, and prefer to |
| 293 // to schedule assignments with register-allocated Dest variables | 291 // schedule assignments with register-allocated Dest variables later, to keep |
| 294 // later, to keep that register free for longer. | 292 // that register free for longer. |
| 295 // | 293 // |
| 296 // Once the ordering is determined, the Cfg edge is split and the | 294 // Once the ordering is determined, the Cfg edge is split and the assignment |
| 297 // assignment list is lowered by the target lowering layer. The | 295 // list is lowered by the target lowering layer. Since the assignment lowering |
| 298 // specific placement of the new node within the Cfg node list is | 296 // may create new infinite-weight temporaries, a follow-on register allocation |
| 299 // deferred until later, including after empty node contraction. | 297 // pass will be needed. To prepare for this, liveness (including live range |
| 298 // calculation) of the split nodes needs to be calculated, and liveness of the |
| 299 // original node need to be updated to "undo" the effects of the phi |
| 300 // assignments. |
| 301 |
| 302 // The specific placement of the new node within the Cfg node list is deferred |
| 303 // until later, including after empty node contraction. |
| 304 // |
| 305 // After phi assignments are lowered across all blocks, another register |
| 306 // allocation pass is run, focusing only on pre-colored and infinite-weight |
| 307 // variables, similar to Om1 register allocation (except without the need to |
| 308 // specially compute these variables' live ranges, since they have already been |
| 309 // precisely calculated). The register allocator in this mode needs the ability |
| 310 // to forcibly spill and reload registers in case none are naturally available. |
| 300 void CfgNode::advancedPhiLowering() { | 311 void CfgNode::advancedPhiLowering() { |
| 301 if (getPhis().empty()) | 312 if (getPhis().empty()) |
| 302 return; | 313 return; |
| 303 | 314 |
| 304 // Count the number of non-deleted Phi instructions. | 315 // Count the number of non-deleted Phi instructions. |
| 305 struct PhiDesc { | 316 struct PhiDesc { |
| 306 InstPhi *Phi; | 317 InstPhi *Phi; |
| 307 Variable *Dest; | 318 Variable *Dest; |
| 308 Operand *Src; | 319 Operand *Src; |
| 309 bool Processed; | 320 bool Processed; |
| 310 size_t NumPred; // number of entries whose Src is this Dest | 321 size_t NumPred; // number of entries whose Src is this Dest |
| 311 int32_t Weight; // preference for topological order | 322 int32_t Weight; // preference for topological order |
| 312 }; | 323 }; |
| 313 llvm::SmallVector<PhiDesc, 32> Desc(getPhis().size()); | 324 llvm::SmallVector<PhiDesc, 32> Desc(getPhis().size()); |
| 314 | 325 |
| 315 size_t NumPhis = 0; | 326 size_t NumPhis = 0; |
| 316 for (Inst &I : Phis) { | 327 for (Inst &I : Phis) { |
| 317 auto Inst = llvm::dyn_cast<InstPhi>(&I); | 328 auto *Inst = llvm::dyn_cast<InstPhi>(&I); |
| 318 if (!Inst->isDeleted()) { | 329 if (!Inst->isDeleted()) { |
| 330 Variable *Dest = Inst->getDest(); |
| 319 Desc[NumPhis].Phi = Inst; | 331 Desc[NumPhis].Phi = Inst; |
| 320 Desc[NumPhis].Dest = Inst->getDest(); | 332 Desc[NumPhis].Dest = Dest; |
| 321 ++NumPhis; | 333 ++NumPhis; |
| 334 // Undo the effect of the phi instruction on this node's live-in set by |
| 335 // marking the phi dest variable as live on entry. |
| 336 SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex()); |
| 337 assert(!Func->getLiveness()->getLiveIn(this)[VarNum]); |
| 338 Func->getLiveness()->getLiveIn(this)[VarNum] = true; |
| 339 Inst->setDeleted(); |
| 322 } | 340 } |
| 323 } | 341 } |
| 324 if (NumPhis == 0) | 342 if (NumPhis == 0) |
| 325 return; | 343 return; |
| 326 | 344 |
| 327 SizeT InEdgeIndex = 0; | 345 SizeT InEdgeIndex = 0; |
| 328 for (CfgNode *Pred : InEdges) { | 346 for (CfgNode *Pred : InEdges) { |
| 329 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); | 347 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); |
| 330 AssignList Assignments; | |
| 331 SizeT Remaining = NumPhis; | 348 SizeT Remaining = NumPhis; |
| 332 | 349 |
| 333 // First pass computes Src and initializes NumPred. | 350 // First pass computes Src and initializes NumPred. |
| 334 for (size_t I = 0; I < NumPhis; ++I) { | 351 for (size_t I = 0; I < NumPhis; ++I) { |
| 335 Variable *Dest = Desc[I].Dest; | 352 Variable *Dest = Desc[I].Dest; |
| 336 Operand *Src = Desc[I].Phi->getOperandForTarget(Pred); | 353 Operand *Src = Desc[I].Phi->getOperandForTarget(Pred); |
| 337 Desc[I].Src = Src; | 354 Desc[I].Src = Src; |
| 338 Desc[I].Processed = false; | 355 Desc[I].Processed = false; |
| 339 Desc[I].NumPred = 0; | 356 Desc[I].NumPred = 0; |
| 340 // Cherry-pick any trivial assignments, so that they don't | 357 // Cherry-pick any trivial assignments, so that they don't |
| 341 // contribute to the running complexity of the topological sort. | 358 // contribute to the running complexity of the topological sort. |
| 342 if (sameVarOrReg(Dest, Src)) { | 359 if (sameVarOrReg(Dest, Src)) { |
| 343 Desc[I].Processed = true; | 360 Desc[I].Processed = true; |
| 344 --Remaining; | 361 --Remaining; |
| 345 if (Dest != Src) | 362 if (Dest != Src) |
| 346 // If Dest and Src are syntactically the same, don't bother | 363 // If Dest and Src are syntactically the same, don't bother |
| 347 // adding the assignment, because in all respects it would | 364 // adding the assignment, because in all respects it would |
| 348 // be redundant, and if Dest/Src are on the stack, the | 365 // be redundant, and if Dest/Src are on the stack, the |
| 349 // target lowering may naively decide to lower it using a | 366 // target lowering may naively decide to lower it using a |
| 350 // temporary register. | 367 // temporary register. |
| 351 Assignments.push_back(InstAssign::create(Func, Dest, Src)); | 368 Split->appendInst(InstAssign::create(Func, Dest, Src)); |
| 352 } | 369 } |
| 353 } | 370 } |
| 354 // Second pass computes NumPred by comparing every pair of Phi | 371 // Second pass computes NumPred by comparing every pair of Phi |
| 355 // instructions. | 372 // instructions. |
| 356 for (size_t I = 0; I < NumPhis; ++I) { | 373 for (size_t I = 0; I < NumPhis; ++I) { |
| 357 if (Desc[I].Processed) | 374 if (Desc[I].Processed) |
| 358 continue; | 375 continue; |
| 359 const Variable *Dest = Desc[I].Dest; | 376 const Variable *Dest = Desc[I].Dest; |
| 360 for (size_t J = 0; J < NumPhis; ++J) { | 377 for (size_t J = 0; J < NumPhis; ++J) { |
| 361 if (Desc[J].Processed) | 378 if (Desc[J].Processed) |
| 362 continue; | 379 continue; |
| 363 if (I != J) { | 380 if (I != J) { |
| 364 // There shouldn't be two Phis with the same Dest variable | 381 // There shouldn't be two Phis with the same Dest variable |
| 365 // or register. | 382 // or register. |
| 366 assert(!sameVarOrReg(Dest, Desc[J].Dest)); | 383 assert(!sameVarOrReg(Dest, Desc[J].Dest)); |
| 367 } | 384 } |
| 368 const Operand *Src = Desc[J].Src; | 385 const Operand *Src = Desc[J].Src; |
| 369 if (sameVarOrReg(Dest, Src)) | 386 if (sameVarOrReg(Dest, Src)) |
| 370 ++Desc[I].NumPred; | 387 ++Desc[I].NumPred; |
| 371 } | 388 } |
| 372 } | 389 } |
| 373 | 390 |
| 374 // Another pass to compute initial Weight values. | 391 // Another pass to compute initial Weight values. |
| 375 | 392 |
| 376 // Always pick NumPred=0 over NumPred>0. | 393 // Always pick NumPred=0 over NumPred>0. |
| 377 const int32_t WeightNoPreds = 4; | 394 constexpr int32_t WeightNoPreds = 4; |
| 378 // Prefer Src as a register because the register might free up. | 395 // Prefer Src as a register because the register might free up. |
| 379 const int32_t WeightSrcIsReg = 2; | 396 constexpr int32_t WeightSrcIsReg = 2; |
| 380 // Prefer Dest not as a register because the register stays free | 397 // Prefer Dest not as a register because the register stays free |
| 381 // longer. | 398 // longer. |
| 382 const int32_t WeightDestNotReg = 1; | 399 constexpr int32_t WeightDestNotReg = 1; |
| 383 | 400 |
| 384 for (size_t I = 0; I < NumPhis; ++I) { | 401 for (size_t I = 0; I < NumPhis; ++I) { |
| 385 if (Desc[I].Processed) | 402 if (Desc[I].Processed) |
| 386 continue; | 403 continue; |
| 387 int32_t Weight = 0; | 404 int32_t Weight = 0; |
| 388 if (Desc[I].NumPred == 0) | 405 if (Desc[I].NumPred == 0) |
| 389 Weight += WeightNoPreds; | 406 Weight += WeightNoPreds; |
| 390 if (auto Var = llvm::dyn_cast<Variable>(Desc[I].Src)) | 407 if (auto Var = llvm::dyn_cast<Variable>(Desc[I].Src)) |
| 391 if (Var->hasReg()) | 408 if (Var->hasReg()) |
| 392 Weight += WeightSrcIsReg; | 409 Weight += WeightSrcIsReg; |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 427 // be rewritten as "X=tmp". | 444 // be rewritten as "X=tmp". |
| 428 for (size_t J = 0; !Found && J < NumPhis; ++J) { | 445 for (size_t J = 0; !Found && J < NumPhis; ++J) { |
| 429 if (Desc[J].Processed) | 446 if (Desc[J].Processed) |
| 430 continue; | 447 continue; |
| 431 Operand *OtherSrc = Desc[J].Src; | 448 Operand *OtherSrc = Desc[J].Src; |
| 432 if (Desc[J].NumPred && sameVarOrReg(Dest, OtherSrc)) { | 449 if (Desc[J].NumPred && sameVarOrReg(Dest, OtherSrc)) { |
| 433 SizeT VarNum = Func->getNumVariables(); | 450 SizeT VarNum = Func->getNumVariables(); |
| 434 Variable *Tmp = Func->makeVariable(OtherSrc->getType()); | 451 Variable *Tmp = Func->makeVariable(OtherSrc->getType()); |
| 435 if (BuildDefs::dump()) | 452 if (BuildDefs::dump()) |
| 436 Tmp->setName(Func, "__split_" + std::to_string(VarNum)); | 453 Tmp->setName(Func, "__split_" + std::to_string(VarNum)); |
| 437 Assignments.push_back(InstAssign::create(Func, Tmp, OtherSrc)); | 454 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc)); |
| 438 Desc[J].Src = Tmp; | 455 Desc[J].Src = Tmp; |
| 439 Found = true; | 456 Found = true; |
| 440 } | 457 } |
| 441 } | 458 } |
| 442 assert(Found); | 459 assert(Found); |
| 443 } | 460 } |
| 444 // Now that a cycle (if any) has been broken, create the actual | 461 // Now that a cycle (if any) has been broken, create the actual |
| 445 // assignment. | 462 // assignment. |
| 446 Assignments.push_back(InstAssign::create(Func, Dest, Src)); | 463 Split->appendInst(InstAssign::create(Func, Dest, Src)); |
| 447 // Update NumPred for all Phi assignments using this Phi's Src | 464 // Update NumPred for all Phi assignments using this Phi's Src |
| 448 // as their Dest variable. Also update Weight if NumPred | 465 // as their Dest variable. Also update Weight if NumPred |
| 449 // dropped from 1 to 0. | 466 // dropped from 1 to 0. |
| 450 if (auto Var = llvm::dyn_cast<Variable>(Src)) { | 467 if (auto Var = llvm::dyn_cast<Variable>(Src)) { |
| 451 for (size_t I = 0; I < NumPhis; ++I) { | 468 for (size_t I = 0; I < NumPhis; ++I) { |
| 452 if (Desc[I].Processed) | 469 if (Desc[I].Processed) |
| 453 continue; | 470 continue; |
| 454 if (sameVarOrReg(Var, Desc[I].Dest)) { | 471 if (sameVarOrReg(Var, Desc[I].Dest)) { |
| 455 if (--Desc[I].NumPred == 0) | 472 if (--Desc[I].NumPred == 0) |
| 456 Desc[I].Weight += WeightNoPreds; | 473 Desc[I].Weight += WeightNoPreds; |
| 457 } | 474 } |
| 458 } | 475 } |
| 459 } | 476 } |
| 460 Desc[BestIndex].Processed = true; | 477 Desc[BestIndex].Processed = true; |
| 461 } | 478 } |
| 479 Split->appendInst(InstBr::create(Func, this)); |
| 462 | 480 |
| 463 Func->getTarget()->lowerPhiAssignments(Split, Assignments); | 481 Split->genCode(); |
| 464 | |
| 465 // Renumber the instructions to be monotonically increasing so | |
| 466 // that addNode() doesn't assert when multi-definitions are added | |
| 467 // out of order. | |
| 468 Split->renumberInstructions(); | |
| 469 Func->getVMetadata()->addNode(Split); | 482 Func->getVMetadata()->addNode(Split); |
| 470 } | 483 } |
| 471 | |
| 472 for (Inst &I : Phis) | |
| 473 I.setDeleted(); | |
| 474 } | 484 } |
| 475 | 485 |
| 476 // Does address mode optimization. Pass each instruction to the | 486 // Does address mode optimization. Pass each instruction to the |
| 477 // TargetLowering object. If it returns a new instruction | 487 // TargetLowering object. If it returns a new instruction |
| 478 // (representing the optimized address mode), then insert the new | 488 // (representing the optimized address mode), then insert the new |
| 479 // instruction and delete the old. | 489 // instruction and delete the old. |
| 480 void CfgNode::doAddressOpt() { | 490 void CfgNode::doAddressOpt() { |
| 481 TargetLowering *Target = Func->getTarget(); | 491 TargetLowering *Target = Func->getTarget(); |
| 482 LoweringContext &Context = Target->getContext(); | 492 LoweringContext &Context = Target->getContext(); |
| 483 Context.init(this); | 493 Context.init(this); |
| (...skipping 405 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 889 Liveness *Liveness = Func->getLiveness(); | 899 Liveness *Liveness = Func->getLiveness(); |
| 890 bool DecorateAsm = | 900 bool DecorateAsm = |
| 891 Liveness && Func->getContext()->getFlags().getDecorateAsm(); | 901 Liveness && Func->getContext()->getFlags().getDecorateAsm(); |
| 892 Str << getAsmName() << ":\n"; | 902 Str << getAsmName() << ":\n"; |
| 893 // LiveRegCount keeps track of the number of currently live | 903 // LiveRegCount keeps track of the number of currently live |
| 894 // variables that each register is assigned to. Normally that would | 904 // variables that each register is assigned to. Normally that would |
| 895 // be only 0 or 1, but the register allocator's AllowOverlap | 905 // be only 0 or 1, but the register allocator's AllowOverlap |
| 896 // inference allows it to be greater than 1 for short periods. | 906 // inference allows it to be greater than 1 for short periods. |
| 897 std::vector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters()); | 907 std::vector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters()); |
| 898 if (DecorateAsm) { | 908 if (DecorateAsm) { |
| 899 const bool IsLiveIn = true; | 909 constexpr bool IsLiveIn = true; |
| 900 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount); | 910 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount); |
| 901 } | 911 } |
| 902 | 912 |
| 903 for (const Inst &I : Phis) { | 913 for (const Inst &I : Phis) { |
| 904 if (I.isDeleted()) | 914 if (I.isDeleted()) |
| 905 continue; | 915 continue; |
| 906 // Emitting a Phi instruction should cause an error. | 916 // Emitting a Phi instruction should cause an error. |
| 907 I.emit(Func); | 917 I.emit(Func); |
| 908 } | 918 } |
| 909 for (const Inst &I : Insts) { | 919 for (const Inst &I : Insts) { |
| (...skipping 14 matching lines...) Expand all Loading... |
| 924 ++LiveRegCount[Dest->getRegNum()]; | 934 ++LiveRegCount[Dest->getRegNum()]; |
| 925 continue; | 935 continue; |
| 926 } | 936 } |
| 927 I.emit(Func); | 937 I.emit(Func); |
| 928 if (DecorateAsm) | 938 if (DecorateAsm) |
| 929 emitLiveRangesEnded(Str, Func, &I, LiveRegCount); | 939 emitLiveRangesEnded(Str, Func, &I, LiveRegCount); |
| 930 Str << "\n"; | 940 Str << "\n"; |
| 931 updateStats(Func, &I); | 941 updateStats(Func, &I); |
| 932 } | 942 } |
| 933 if (DecorateAsm) { | 943 if (DecorateAsm) { |
| 934 const bool IsLiveIn = false; | 944 constexpr bool IsLiveIn = false; |
| 935 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount); | 945 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount); |
| 936 } | 946 } |
| 937 } | 947 } |
| 938 | 948 |
| 939 // Helper class for emitIAS(). | 949 // Helper class for emitIAS(). |
| 940 namespace { | 950 namespace { |
| 941 class BundleEmitHelper { | 951 class BundleEmitHelper { |
| 942 BundleEmitHelper() = delete; | 952 BundleEmitHelper() = delete; |
| 943 BundleEmitHelper(const BundleEmitHelper &) = delete; | 953 BundleEmitHelper(const BundleEmitHelper &) = delete; |
| 944 BundleEmitHelper &operator=(const BundleEmitHelper &) = delete; | 954 BundleEmitHelper &operator=(const BundleEmitHelper &) = delete; |
| (...skipping 321 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1266 InstIntrinsicCall *Inst = InstIntrinsicCall::create( | 1276 InstIntrinsicCall *Inst = InstIntrinsicCall::create( |
| 1267 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); | 1277 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); |
| 1268 Inst->addArg(AtomicRMWOp); | 1278 Inst->addArg(AtomicRMWOp); |
| 1269 Inst->addArg(Counter); | 1279 Inst->addArg(Counter); |
| 1270 Inst->addArg(One); | 1280 Inst->addArg(One); |
| 1271 Inst->addArg(OrderAcquireRelease); | 1281 Inst->addArg(OrderAcquireRelease); |
| 1272 Insts.push_front(Inst); | 1282 Insts.push_front(Inst); |
| 1273 } | 1283 } |
| 1274 | 1284 |
| 1275 } // end of namespace Ice | 1285 } // end of namespace Ice |
| OLD | NEW |