| OLD | NEW |
| 1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// | 1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 280 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 291 for (Inst &I : Pred->getInsts()) | 291 for (Inst &I : Pred->getInsts()) |
| 292 if (!I.isDeleted() && I.repointEdges(this, NewNode)) | 292 if (!I.isDeleted() && I.repointEdges(this, NewNode)) |
| 293 Found = true; | 293 Found = true; |
| 294 assert(Found); | 294 assert(Found); |
| 295 (void)Found; | 295 (void)Found; |
| 296 return NewNode; | 296 return NewNode; |
| 297 } | 297 } |
| 298 | 298 |
| 299 namespace { | 299 namespace { |
| 300 | 300 |
| 301 // Helper function used by advancedPhiLowering(). | 301 // Helpers for advancedPhiLowering(). |
| 302 |
| 303 class PhiDesc { |
| 304 PhiDesc() = delete; |
| 305 PhiDesc(const PhiDesc &) = delete; |
| 306 PhiDesc &operator=(const PhiDesc &) = delete; |
| 307 public: |
| 308 PhiDesc(InstPhi *Phi, Variable *Dest) : Phi(Phi), Dest(Dest) {} |
| 309 PhiDesc(PhiDesc &&) = default; |
| 310 InstPhi *Phi = nullptr; |
| 311 Variable *Dest = nullptr; |
| 312 Operand *Src = nullptr; |
| 313 bool Processed = false; |
| 314 size_t NumPred = 0; // number of entries whose Src is this Dest |
| 315 int32_t Weight = 0; // preference for topological order |
| 316 }; |
| 317 using PhiDescList = llvm::SmallVector<PhiDesc, 32>; |
| 318 |
| 319 // Always pick NumPred=0 over NumPred>0. |
| 320 constexpr int32_t WeightNoPreds = 4; |
| 321 // Prefer Src as a register because the register might free up. |
| 322 constexpr int32_t WeightSrcIsReg = 2; |
| 323 // Prefer Dest not as a register because the register stays free longer. |
| 324 constexpr int32_t WeightDestNotReg = 1; |
| 325 |
| 302 bool sameVarOrReg(TargetLowering *Target, const Variable *Var1, | 326 bool sameVarOrReg(TargetLowering *Target, const Variable *Var1, |
| 303 const Operand *Opnd) { | 327 const Operand *Opnd) { |
| 304 if (Var1 == Opnd) | 328 if (Var1 == Opnd) |
| 305 return true; | 329 return true; |
| 306 const auto *Var2 = llvm::dyn_cast<Variable>(Opnd); | 330 const auto *Var2 = llvm::dyn_cast<Variable>(Opnd); |
| 307 if (Var2 == nullptr) | 331 if (Var2 == nullptr) |
| 308 return false; | 332 return false; |
| 309 | 333 |
| 310 // If either operand lacks a register, they cannot be the same. | 334 // If either operand lacks a register, they cannot be the same. |
| 311 if (!Var1->hasReg()) | 335 if (!Var1->hasReg()) |
| 312 return false; | 336 return false; |
| 313 if (!Var2->hasReg()) | 337 if (!Var2->hasReg()) |
| 314 return false; | 338 return false; |
| 315 | 339 |
| 316 int32_t RegNum1 = Var1->getRegNum(); | 340 int32_t RegNum1 = Var1->getRegNum(); |
| 317 int32_t RegNum2 = Var2->getRegNum(); | 341 int32_t RegNum2 = Var2->getRegNum(); |
| 318 // Quick common-case check. | 342 // Quick common-case check. |
| 319 if (RegNum1 == RegNum2) | 343 if (RegNum1 == RegNum2) |
| 320 return true; | 344 return true; |
| 321 | 345 |
| 322 assert(Target->getAliasesForRegister(RegNum1)[RegNum2] == | 346 assert(Target->getAliasesForRegister(RegNum1)[RegNum2] == |
| 323 Target->getAliasesForRegister(RegNum2)[RegNum1]); | 347 Target->getAliasesForRegister(RegNum2)[RegNum1]); |
| 324 return Target->getAliasesForRegister(RegNum1)[RegNum2]; | 348 return Target->getAliasesForRegister(RegNum1)[RegNum2]; |
| 325 } | 349 } |
| 326 | 350 |
| 351 // Update NumPred for all Phi assignments using Var as their Dest variable. |
| 352 // Also update Weight if NumPred dropped from 1 to 0. |
| 353 void updatePreds(PhiDescList &Desc, TargetLowering *Target, Variable *Var) { |
| 354 for (PhiDesc &Item : Desc) { |
| 355 if (!Item.Processed && sameVarOrReg(Target, Var, Item.Dest)) { |
| 356 if (--Item.NumPred == 0) { |
| 357 Item.Weight += WeightNoPreds; |
| 358 } |
| 359 } |
| 360 } |
| 361 } |
| 362 |
| 327 } // end of anonymous namespace | 363 } // end of anonymous namespace |
| 328 | 364 |
| 329 // This the "advanced" version of Phi lowering for a basic block, in contrast | 365 // This the "advanced" version of Phi lowering for a basic block, in contrast |
| 330 // to the simple version that lowers through assignments involving temporaries. | 366 // to the simple version that lowers through assignments involving temporaries. |
| 331 // | 367 // |
| 332 // All Phi instructions in a basic block are conceptually executed in parallel. | 368 // All Phi instructions in a basic block are conceptually executed in parallel. |
| 333 // However, if we lower Phis early and commit to a sequential ordering, we may | 369 // However, if we lower Phis early and commit to a sequential ordering, we may |
| 334 // end up creating unnecessary interferences which lead to worse register | 370 // end up creating unnecessary interferences which lead to worse register |
| 335 // allocation. Delaying Phi scheduling until after register allocation can help | 371 // allocation. Delaying Phi scheduling until after register allocation can help |
| 336 // unless there are no free registers for shuffling registers or stack slots | 372 // unless there are no free registers for shuffling registers or stack slots |
| (...skipping 24 matching lines...) Expand all Loading... |
| 361 // After phi assignments are lowered across all blocks, another register | 397 // After phi assignments are lowered across all blocks, another register |
| 362 // allocation pass is run, focusing only on pre-colored and infinite-weight | 398 // allocation pass is run, focusing only on pre-colored and infinite-weight |
| 363 // variables, similar to Om1 register allocation (except without the need to | 399 // variables, similar to Om1 register allocation (except without the need to |
| 364 // specially compute these variables' live ranges, since they have already been | 400 // specially compute these variables' live ranges, since they have already been |
| 365 // precisely calculated). The register allocator in this mode needs the ability | 401 // precisely calculated). The register allocator in this mode needs the ability |
| 366 // to forcibly spill and reload registers in case none are naturally available. | 402 // to forcibly spill and reload registers in case none are naturally available. |
| 367 void CfgNode::advancedPhiLowering() { | 403 void CfgNode::advancedPhiLowering() { |
| 368 if (getPhis().empty()) | 404 if (getPhis().empty()) |
| 369 return; | 405 return; |
| 370 | 406 |
| 371 // Count the number of non-deleted Phi instructions. | 407 PhiDescList Desc; |
| 372 struct PhiDesc { | |
| 373 InstPhi *Phi; | |
| 374 Variable *Dest; | |
| 375 Operand *Src; | |
| 376 bool Processed; | |
| 377 size_t NumPred; // number of entries whose Src is this Dest | |
| 378 int32_t Weight; // preference for topological order | |
| 379 }; | |
| 380 llvm::SmallVector<PhiDesc, 32> Desc(getPhis().size()); | |
| 381 | 408 |
| 382 size_t NumPhis = 0; | |
| 383 for (Inst &I : Phis) { | 409 for (Inst &I : Phis) { |
| 384 auto *Inst = llvm::dyn_cast<InstPhi>(&I); | 410 auto *Phi = llvm::dyn_cast<InstPhi>(&I); |
| 385 if (!Inst->isDeleted()) { | 411 if (!Phi->isDeleted()) { |
| 386 Variable *Dest = Inst->getDest(); | 412 Variable *Dest = Phi->getDest(); |
| 387 Desc[NumPhis].Phi = Inst; | 413 Desc.emplace_back(Phi, Dest); |
| 388 Desc[NumPhis].Dest = Dest; | |
| 389 ++NumPhis; | |
| 390 // Undo the effect of the phi instruction on this node's live-in set by | 414 // Undo the effect of the phi instruction on this node's live-in set by |
| 391 // marking the phi dest variable as live on entry. | 415 // marking the phi dest variable as live on entry. |
| 392 SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex()); | 416 SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex()); |
| 393 assert(!Func->getLiveness()->getLiveIn(this)[VarNum]); | 417 assert(!Func->getLiveness()->getLiveIn(this)[VarNum]); |
| 394 Func->getLiveness()->getLiveIn(this)[VarNum] = true; | 418 Func->getLiveness()->getLiveIn(this)[VarNum] = true; |
| 395 Inst->setDeleted(); | 419 Phi->setDeleted(); |
| 396 } | 420 } |
| 397 } | 421 } |
| 398 if (NumPhis == 0) | 422 if (Desc.empty()) |
| 399 return; | 423 return; |
| 400 | 424 |
| 401 TargetLowering *Target = Func->getTarget(); | 425 TargetLowering *Target = Func->getTarget(); |
| 402 SizeT InEdgeIndex = 0; | 426 SizeT InEdgeIndex = 0; |
| 403 for (CfgNode *Pred : InEdges) { | 427 for (CfgNode *Pred : InEdges) { |
| 404 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); | 428 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); |
| 405 SizeT Remaining = NumPhis; | 429 SizeT Remaining = Desc.size(); |
| 406 | 430 |
| 407 // First pass computes Src and initializes NumPred. | 431 // First pass computes Src and initializes NumPred. |
| 408 for (size_t I = 0; I < NumPhis; ++I) { | 432 for (PhiDesc &Item : Desc) { |
| 409 Variable *Dest = Desc[I].Dest; | 433 Variable *Dest = Item.Dest; |
| 410 Operand *Src = Desc[I].Phi->getOperandForTarget(Pred); | 434 Operand *Src = Item.Phi->getOperandForTarget(Pred); |
| 411 Desc[I].Src = Src; | 435 Item.Src = Src; |
| 412 Desc[I].Processed = false; | 436 Item.Processed = false; |
| 413 Desc[I].NumPred = 0; | 437 Item.NumPred = 0; |
| 414 // Cherry-pick any trivial assignments, so that they don't contribute to | 438 // Cherry-pick any trivial assignments, so that they don't contribute to |
| 415 // the running complexity of the topological sort. | 439 // the running complexity of the topological sort. |
| 416 if (sameVarOrReg(Target, Dest, Src)) { | 440 if (sameVarOrReg(Target, Dest, Src)) { |
| 417 Desc[I].Processed = true; | 441 Item.Processed = true; |
| 418 --Remaining; | 442 --Remaining; |
| 419 if (Dest != Src) | 443 if (Dest != Src) |
| 420 // If Dest and Src are syntactically the same, don't bother adding | 444 // If Dest and Src are syntactically the same, don't bother adding |
| 421 // the assignment, because in all respects it would be redundant, and | 445 // the assignment, because in all respects it would be redundant, and |
| 422 // if Dest/Src are on the stack, the target lowering may naively | 446 // if Dest/Src are on the stack, the target lowering may naively |
| 423 // decide to lower it using a temporary register. | 447 // decide to lower it using a temporary register. |
| 424 Split->appendInst(InstAssign::create(Func, Dest, Src)); | 448 Split->appendInst(InstAssign::create(Func, Dest, Src)); |
| 425 } | 449 } |
| 426 } | 450 } |
| 427 // Second pass computes NumPred by comparing every pair of Phi | 451 // Second pass computes NumPred by comparing every pair of Phi instructions. |
| 428 // instructions. | 452 for (PhiDesc &Item : Desc) { |
| 429 for (size_t I = 0; I < NumPhis; ++I) { | 453 if (Item.Processed) |
| 430 if (Desc[I].Processed) | |
| 431 continue; | 454 continue; |
| 432 const Variable *Dest = Desc[I].Dest; | 455 const Variable *Dest = Item.Dest; |
| 433 for (size_t J = 0; J < NumPhis; ++J) { | 456 for (PhiDesc &Item2 : Desc) { |
| 434 if (Desc[J].Processed) | 457 if (Item2.Processed) |
| 435 continue; | 458 continue; |
| 436 if (I != J) { | 459 // There shouldn't be two different Phis with the same Dest variable or |
| 437 // There shouldn't be two Phis with the same Dest variable or | |
| 438 // register. | 460 // register. |
| 439 assert(!sameVarOrReg(Target, Dest, Desc[J].Dest)); | 461 assert((&Item == &Item2) || !sameVarOrReg(Target, Dest, Item2.Dest)); |
| 440 } | 462 if (sameVarOrReg(Target, Dest, Item2.Src)) |
| 441 const Operand *Src = Desc[J].Src; | 463 ++Item.NumPred; |
| 442 if (sameVarOrReg(Target, Dest, Src)) | |
| 443 ++Desc[I].NumPred; | |
| 444 } | 464 } |
| 445 } | 465 } |
| 446 | 466 |
| 447 // Another pass to compute initial Weight values. | 467 // Another pass to compute initial Weight values. |
| 448 | 468 for (PhiDesc &Item : Desc) { |
| 449 // Always pick NumPred=0 over NumPred>0. | 469 if (Item.Processed) |
| 450 constexpr int32_t WeightNoPreds = 4; | |
| 451 // Prefer Src as a register because the register might free up. | |
| 452 constexpr int32_t WeightSrcIsReg = 2; | |
| 453 // Prefer Dest not as a register because the register stays free longer. | |
| 454 constexpr int32_t WeightDestNotReg = 1; | |
| 455 | |
| 456 for (size_t I = 0; I < NumPhis; ++I) { | |
| 457 if (Desc[I].Processed) | |
| 458 continue; | 470 continue; |
| 459 int32_t Weight = 0; | 471 int32_t Weight = 0; |
| 460 if (Desc[I].NumPred == 0) | 472 if (Item.NumPred == 0) |
| 461 Weight += WeightNoPreds; | 473 Weight += WeightNoPreds; |
| 462 if (auto *Var = llvm::dyn_cast<Variable>(Desc[I].Src)) | 474 if (auto *Var = llvm::dyn_cast<Variable>(Item.Src)) |
| 463 if (Var->hasReg()) | 475 if (Var->hasReg()) |
| 464 Weight += WeightSrcIsReg; | 476 Weight += WeightSrcIsReg; |
| 465 if (!Desc[I].Dest->hasReg()) | 477 if (!Item.Dest->hasReg()) |
| 466 Weight += WeightDestNotReg; | 478 Weight += WeightDestNotReg; |
| 467 Desc[I].Weight = Weight; | 479 Item.Weight = Weight; |
| 468 } | 480 } |
| 469 | 481 |
| 470 // Repeatedly choose and process the best candidate in the topological | 482 // Repeatedly choose and process the best candidate in the topological sort, |
| 471 // sort, until no candidates remain. This implementation is O(N^2) where N | 483 // until no candidates remain. This implementation is O(N^2) where N is the |
| 472 // is the number of Phi instructions, but with a small constant factor | 484 // number of Phi instructions, but with a small constant factor compared to |
| 473 // compared to a likely implementation of O(N) topological sort. | 485 // a likely implementation of O(N) topological sort. |
| 474 for (; Remaining; --Remaining) { | 486 for (; Remaining; --Remaining) { |
| 475 size_t BestIndex = 0; | |
| 476 int32_t BestWeight = -1; | 487 int32_t BestWeight = -1; |
| 488 PhiDesc *BestItem = nullptr; |
| 477 // Find the best candidate. | 489 // Find the best candidate. |
| 478 for (size_t I = 0; I < NumPhis; ++I) { | 490 for (PhiDesc &Item : Desc) { |
| 479 if (Desc[I].Processed) | 491 if (Item.Processed) |
| 480 continue; | 492 continue; |
| 481 int32_t Weight = 0; | 493 int32_t Weight = 0; |
| 482 Weight = Desc[I].Weight; | 494 Weight = Item.Weight; |
| 483 if (Weight > BestWeight) { | 495 if (Weight > BestWeight) { |
| 484 BestIndex = I; | 496 BestItem = &Item; |
| 485 BestWeight = Weight; | 497 BestWeight = Weight; |
| 486 } | 498 } |
| 487 } | 499 } |
| 488 assert(BestWeight >= 0); | 500 assert(BestWeight >= 0); |
| 489 assert(Desc[BestIndex].NumPred <= 1); | 501 assert(BestItem->NumPred <= 1); |
| 490 Variable *Dest = Desc[BestIndex].Dest; | 502 Variable *Dest = BestItem->Dest; |
| 491 Operand *Src = Desc[BestIndex].Src; | 503 Operand *Src = BestItem->Src; |
| 492 assert(!sameVarOrReg(Target, Dest, Src)); | 504 assert(!sameVarOrReg(Target, Dest, Src)); |
| 493 // Break a cycle by introducing a temporary. | 505 // Break a cycle by introducing a temporary. |
| 494 if (Desc[BestIndex].NumPred) { | 506 if (BestItem->NumPred) { |
| 495 bool Found = false; | 507 bool Found = false; |
| 496 // If the target instruction "A=B" is part of a cycle, find the "X=A" | 508 // If the target instruction "A=B" is part of a cycle, find the "X=A" |
| 497 // assignment in the cycle because it will have to be rewritten as | 509 // assignment in the cycle because it will have to be rewritten as |
| 498 // "X=tmp". | 510 // "X=tmp". |
| 499 for (size_t J = 0; !Found && J < NumPhis; ++J) { | 511 for (PhiDesc &Item : Desc) { |
| 500 if (Desc[J].Processed) | 512 if (Item.Processed) |
| 501 continue; | 513 continue; |
| 502 Operand *OtherSrc = Desc[J].Src; | 514 Operand *OtherSrc = Item.Src; |
| 503 if (Desc[J].NumPred && sameVarOrReg(Target, Dest, OtherSrc)) { | 515 if (Item.NumPred && sameVarOrReg(Target, Dest, OtherSrc)) { |
| 504 SizeT VarNum = Func->getNumVariables(); | 516 SizeT VarNum = Func->getNumVariables(); |
| 505 Variable *Tmp = Func->makeVariable(OtherSrc->getType()); | 517 Variable *Tmp = Func->makeVariable(OtherSrc->getType()); |
| 506 if (BuildDefs::dump()) | 518 if (BuildDefs::dump()) |
| 507 Tmp->setName(Func, "__split_" + std::to_string(VarNum)); | 519 Tmp->setName(Func, "__split_" + std::to_string(VarNum)); |
| 508 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc)); | 520 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc)); |
| 509 Desc[J].Src = Tmp; | 521 Item.Src = Tmp; |
| 522 updatePreds(Desc, Target, llvm::cast<Variable>(OtherSrc)); |
| 510 Found = true; | 523 Found = true; |
| 524 break; |
| 511 } | 525 } |
| 512 } | 526 } |
| 513 assert(Found); | 527 assert(Found); |
| 514 } | 528 } |
| 515 // Now that a cycle (if any) has been broken, create the actual | 529 // Now that a cycle (if any) has been broken, create the actual |
| 516 // assignment. | 530 // assignment. |
| 517 Split->appendInst(InstAssign::create(Func, Dest, Src)); | 531 Split->appendInst(InstAssign::create(Func, Dest, Src)); |
| 518 // Update NumPred for all Phi assignments using this Phi's Src as their | 532 if (auto *Var = llvm::dyn_cast<Variable>(Src)) |
| 519 // Dest variable. Also update Weight if NumPred dropped from 1 to 0. | 533 updatePreds(Desc, Target, Var); |
| 520 if (auto *Var = llvm::dyn_cast<Variable>(Src)) { | 534 BestItem->Processed = true; |
| 521 for (size_t I = 0; I < NumPhis; ++I) { | |
| 522 if (Desc[I].Processed) | |
| 523 continue; | |
| 524 if (sameVarOrReg(Target, Var, Desc[I].Dest)) { | |
| 525 if (--Desc[I].NumPred == 0) | |
| 526 Desc[I].Weight += WeightNoPreds; | |
| 527 } | |
| 528 } | |
| 529 } | |
| 530 Desc[BestIndex].Processed = true; | |
| 531 } | 535 } |
| 532 Split->appendInst(InstBr::create(Func, this)); | 536 Split->appendInst(InstBr::create(Func, this)); |
| 533 | 537 |
| 534 Split->genCode(); | 538 Split->genCode(); |
| 535 Func->getVMetadata()->addNode(Split); | 539 Func->getVMetadata()->addNode(Split); |
| 540 // Validate to be safe. All items should be marked as processed, and have |
| 541 // no predecessors. |
| 542 if (BuildDefs::asserts()) { |
| 543 for (PhiDesc &Item : Desc) { |
| 544 (void)Item; |
| 545 assert(Item.Processed); |
| 546 assert(Item.NumPred == 0); |
| 547 } |
| 548 } |
| 536 } | 549 } |
| 537 } | 550 } |
| 538 | 551 |
| 539 // Does address mode optimization. Pass each instruction to the TargetLowering | 552 // Does address mode optimization. Pass each instruction to the TargetLowering |
| 540 // object. If it returns a new instruction (representing the optimized address | 553 // object. If it returns a new instruction (representing the optimized address |
| 541 // mode), then insert the new instruction and delete the old. | 554 // mode), then insert the new instruction and delete the old. |
| 542 void CfgNode::doAddressOpt() { | 555 void CfgNode::doAddressOpt() { |
| 543 TargetLowering *Target = Func->getTarget(); | 556 TargetLowering *Target = Func->getTarget(); |
| 544 LoweringContext &Context = Target->getContext(); | 557 LoweringContext &Context = Target->getContext(); |
| 545 Context.init(this); | 558 Context.init(this); |
| (...skipping 846 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1392 InstIntrinsicCall *Inst = InstIntrinsicCall::create( | 1405 InstIntrinsicCall *Inst = InstIntrinsicCall::create( |
| 1393 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); | 1406 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); |
| 1394 Inst->addArg(AtomicRMWOp); | 1407 Inst->addArg(AtomicRMWOp); |
| 1395 Inst->addArg(Counter); | 1408 Inst->addArg(Counter); |
| 1396 Inst->addArg(One); | 1409 Inst->addArg(One); |
| 1397 Inst->addArg(OrderAcquireRelease); | 1410 Inst->addArg(OrderAcquireRelease); |
| 1398 Insts.push_front(Inst); | 1411 Insts.push_front(Inst); |
| 1399 } | 1412 } |
| 1400 | 1413 |
| 1401 } // end of namespace Ice | 1414 } // end of namespace Ice |
| OLD | NEW |