OLD | NEW |
1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// | 1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 280 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
291 for (Inst &I : Pred->getInsts()) | 291 for (Inst &I : Pred->getInsts()) |
292 if (!I.isDeleted() && I.repointEdges(this, NewNode)) | 292 if (!I.isDeleted() && I.repointEdges(this, NewNode)) |
293 Found = true; | 293 Found = true; |
294 assert(Found); | 294 assert(Found); |
295 (void)Found; | 295 (void)Found; |
296 return NewNode; | 296 return NewNode; |
297 } | 297 } |
298 | 298 |
299 namespace { | 299 namespace { |
300 | 300 |
301 // Helper function used by advancedPhiLowering(). | 301 // Helpers for advancedPhiLowering(). |
| 302 |
| 303 class PhiDesc { |
| 304 PhiDesc() = delete; |
| 305 PhiDesc(const PhiDesc &) = delete; |
| 306 PhiDesc &operator=(const PhiDesc &) = delete; |
| 307 public: |
| 308 PhiDesc(InstPhi *Phi, Variable *Dest) : Phi(Phi), Dest(Dest) {} |
| 309 PhiDesc(PhiDesc &&) = default; |
| 310 InstPhi *Phi = nullptr; |
| 311 Variable *Dest = nullptr; |
| 312 Operand *Src = nullptr; |
| 313 bool Processed = false; |
| 314 size_t NumPred = 0; // number of entries whose Src is this Dest |
| 315 int32_t Weight = 0; // preference for topological order |
| 316 }; |
| 317 using PhiDescList = llvm::SmallVector<PhiDesc, 32>; |
| 318 |
| 319 // Always pick NumPred=0 over NumPred>0. |
| 320 constexpr int32_t WeightNoPreds = 4; |
| 321 // Prefer Src as a register because the register might free up. |
| 322 constexpr int32_t WeightSrcIsReg = 2; |
| 323 // Prefer Dest not as a register because the register stays free longer. |
| 324 constexpr int32_t WeightDestNotReg = 1; |
| 325 |
302 bool sameVarOrReg(TargetLowering *Target, const Variable *Var1, | 326 bool sameVarOrReg(TargetLowering *Target, const Variable *Var1, |
303 const Operand *Opnd) { | 327 const Operand *Opnd) { |
304 if (Var1 == Opnd) | 328 if (Var1 == Opnd) |
305 return true; | 329 return true; |
306 const auto *Var2 = llvm::dyn_cast<Variable>(Opnd); | 330 const auto *Var2 = llvm::dyn_cast<Variable>(Opnd); |
307 if (Var2 == nullptr) | 331 if (Var2 == nullptr) |
308 return false; | 332 return false; |
309 | 333 |
310 // If either operand lacks a register, they cannot be the same. | 334 // If either operand lacks a register, they cannot be the same. |
311 if (!Var1->hasReg()) | 335 if (!Var1->hasReg()) |
312 return false; | 336 return false; |
313 if (!Var2->hasReg()) | 337 if (!Var2->hasReg()) |
314 return false; | 338 return false; |
315 | 339 |
316 int32_t RegNum1 = Var1->getRegNum(); | 340 int32_t RegNum1 = Var1->getRegNum(); |
317 int32_t RegNum2 = Var2->getRegNum(); | 341 int32_t RegNum2 = Var2->getRegNum(); |
318 // Quick common-case check. | 342 // Quick common-case check. |
319 if (RegNum1 == RegNum2) | 343 if (RegNum1 == RegNum2) |
320 return true; | 344 return true; |
321 | 345 |
322 assert(Target->getAliasesForRegister(RegNum1)[RegNum2] == | 346 assert(Target->getAliasesForRegister(RegNum1)[RegNum2] == |
323 Target->getAliasesForRegister(RegNum2)[RegNum1]); | 347 Target->getAliasesForRegister(RegNum2)[RegNum1]); |
324 return Target->getAliasesForRegister(RegNum1)[RegNum2]; | 348 return Target->getAliasesForRegister(RegNum1)[RegNum2]; |
325 } | 349 } |
326 | 350 |
| 351 // Update NumPred for all Phi assignments using Var as their Dest variable. |
| 352 // Also update Weight if NumPred dropped from 1 to 0. |
| 353 void updatePreds(PhiDescList &Desc, TargetLowering *Target, Variable *Var) { |
| 354 for (PhiDesc &Item : Desc) { |
| 355 if (!Item.Processed && sameVarOrReg(Target, Var, Item.Dest)) { |
| 356 if (--Item.NumPred == 0) { |
| 357 Item.Weight += WeightNoPreds; |
| 358 } |
| 359 } |
| 360 } |
| 361 } |
| 362 |
327 } // end of anonymous namespace | 363 } // end of anonymous namespace |
328 | 364 |
329 // This the "advanced" version of Phi lowering for a basic block, in contrast | 365 // This the "advanced" version of Phi lowering for a basic block, in contrast |
330 // to the simple version that lowers through assignments involving temporaries. | 366 // to the simple version that lowers through assignments involving temporaries. |
331 // | 367 // |
332 // All Phi instructions in a basic block are conceptually executed in parallel. | 368 // All Phi instructions in a basic block are conceptually executed in parallel. |
333 // However, if we lower Phis early and commit to a sequential ordering, we may | 369 // However, if we lower Phis early and commit to a sequential ordering, we may |
334 // end up creating unnecessary interferences which lead to worse register | 370 // end up creating unnecessary interferences which lead to worse register |
335 // allocation. Delaying Phi scheduling until after register allocation can help | 371 // allocation. Delaying Phi scheduling until after register allocation can help |
336 // unless there are no free registers for shuffling registers or stack slots | 372 // unless there are no free registers for shuffling registers or stack slots |
(...skipping 24 matching lines...) Expand all Loading... |
361 // After phi assignments are lowered across all blocks, another register | 397 // After phi assignments are lowered across all blocks, another register |
362 // allocation pass is run, focusing only on pre-colored and infinite-weight | 398 // allocation pass is run, focusing only on pre-colored and infinite-weight |
363 // variables, similar to Om1 register allocation (except without the need to | 399 // variables, similar to Om1 register allocation (except without the need to |
364 // specially compute these variables' live ranges, since they have already been | 400 // specially compute these variables' live ranges, since they have already been |
365 // precisely calculated). The register allocator in this mode needs the ability | 401 // precisely calculated). The register allocator in this mode needs the ability |
366 // to forcibly spill and reload registers in case none are naturally available. | 402 // to forcibly spill and reload registers in case none are naturally available. |
367 void CfgNode::advancedPhiLowering() { | 403 void CfgNode::advancedPhiLowering() { |
368 if (getPhis().empty()) | 404 if (getPhis().empty()) |
369 return; | 405 return; |
370 | 406 |
371 // Count the number of non-deleted Phi instructions. | 407 PhiDescList Desc; |
372 struct PhiDesc { | |
373 InstPhi *Phi; | |
374 Variable *Dest; | |
375 Operand *Src; | |
376 bool Processed; | |
377 size_t NumPred; // number of entries whose Src is this Dest | |
378 int32_t Weight; // preference for topological order | |
379 }; | |
380 llvm::SmallVector<PhiDesc, 32> Desc(getPhis().size()); | |
381 | 408 |
382 size_t NumPhis = 0; | |
383 for (Inst &I : Phis) { | 409 for (Inst &I : Phis) { |
384 auto *Inst = llvm::dyn_cast<InstPhi>(&I); | 410 auto *Phi = llvm::dyn_cast<InstPhi>(&I); |
385 if (!Inst->isDeleted()) { | 411 if (!Phi->isDeleted()) { |
386 Variable *Dest = Inst->getDest(); | 412 Variable *Dest = Phi->getDest(); |
387 Desc[NumPhis].Phi = Inst; | 413 Desc.emplace_back(Phi, Dest); |
388 Desc[NumPhis].Dest = Dest; | |
389 ++NumPhis; | |
390 // Undo the effect of the phi instruction on this node's live-in set by | 414 // Undo the effect of the phi instruction on this node's live-in set by |
391 // marking the phi dest variable as live on entry. | 415 // marking the phi dest variable as live on entry. |
392 SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex()); | 416 SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex()); |
393 assert(!Func->getLiveness()->getLiveIn(this)[VarNum]); | 417 assert(!Func->getLiveness()->getLiveIn(this)[VarNum]); |
394 Func->getLiveness()->getLiveIn(this)[VarNum] = true; | 418 Func->getLiveness()->getLiveIn(this)[VarNum] = true; |
395 Inst->setDeleted(); | 419 Phi->setDeleted(); |
396 } | 420 } |
397 } | 421 } |
398 if (NumPhis == 0) | 422 if (Desc.empty()) |
399 return; | 423 return; |
400 | 424 |
401 TargetLowering *Target = Func->getTarget(); | 425 TargetLowering *Target = Func->getTarget(); |
402 SizeT InEdgeIndex = 0; | 426 SizeT InEdgeIndex = 0; |
403 for (CfgNode *Pred : InEdges) { | 427 for (CfgNode *Pred : InEdges) { |
404 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); | 428 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); |
405 SizeT Remaining = NumPhis; | 429 SizeT Remaining = Desc.size(); |
406 | 430 |
407 // First pass computes Src and initializes NumPred. | 431 // First pass computes Src and initializes NumPred. |
408 for (size_t I = 0; I < NumPhis; ++I) { | 432 for (PhiDesc &Item : Desc) { |
409 Variable *Dest = Desc[I].Dest; | 433 Variable *Dest = Item.Dest; |
410 Operand *Src = Desc[I].Phi->getOperandForTarget(Pred); | 434 Operand *Src = Item.Phi->getOperandForTarget(Pred); |
411 Desc[I].Src = Src; | 435 Item.Src = Src; |
412 Desc[I].Processed = false; | 436 Item.Processed = false; |
413 Desc[I].NumPred = 0; | 437 Item.NumPred = 0; |
414 // Cherry-pick any trivial assignments, so that they don't contribute to | 438 // Cherry-pick any trivial assignments, so that they don't contribute to |
415 // the running complexity of the topological sort. | 439 // the running complexity of the topological sort. |
416 if (sameVarOrReg(Target, Dest, Src)) { | 440 if (sameVarOrReg(Target, Dest, Src)) { |
417 Desc[I].Processed = true; | 441 Item.Processed = true; |
418 --Remaining; | 442 --Remaining; |
419 if (Dest != Src) | 443 if (Dest != Src) |
420 // If Dest and Src are syntactically the same, don't bother adding | 444 // If Dest and Src are syntactically the same, don't bother adding |
421 // the assignment, because in all respects it would be redundant, and | 445 // the assignment, because in all respects it would be redundant, and |
422 // if Dest/Src are on the stack, the target lowering may naively | 446 // if Dest/Src are on the stack, the target lowering may naively |
423 // decide to lower it using a temporary register. | 447 // decide to lower it using a temporary register. |
424 Split->appendInst(InstAssign::create(Func, Dest, Src)); | 448 Split->appendInst(InstAssign::create(Func, Dest, Src)); |
425 } | 449 } |
426 } | 450 } |
427 // Second pass computes NumPred by comparing every pair of Phi | 451 // Second pass computes NumPred by comparing every pair of Phi instructions. |
428 // instructions. | 452 for (PhiDesc &Item : Desc) { |
429 for (size_t I = 0; I < NumPhis; ++I) { | 453 if (Item.Processed) |
430 if (Desc[I].Processed) | |
431 continue; | 454 continue; |
432 const Variable *Dest = Desc[I].Dest; | 455 const Variable *Dest = Item.Dest; |
433 for (size_t J = 0; J < NumPhis; ++J) { | 456 for (PhiDesc &Item2 : Desc) { |
434 if (Desc[J].Processed) | 457 if (Item2.Processed) |
435 continue; | 458 continue; |
436 if (I != J) { | 459 // There shouldn't be two different Phis with the same Dest variable or |
437 // There shouldn't be two Phis with the same Dest variable or | |
438 // register. | 460 // register. |
439 assert(!sameVarOrReg(Target, Dest, Desc[J].Dest)); | 461 assert((&Item == &Item2) || !sameVarOrReg(Target, Dest, Item2.Dest)); |
440 } | 462 if (sameVarOrReg(Target, Dest, Item2.Src)) |
441 const Operand *Src = Desc[J].Src; | 463 ++Item.NumPred; |
442 if (sameVarOrReg(Target, Dest, Src)) | |
443 ++Desc[I].NumPred; | |
444 } | 464 } |
445 } | 465 } |
446 | 466 |
447 // Another pass to compute initial Weight values. | 467 // Another pass to compute initial Weight values. |
448 | 468 for (PhiDesc &Item : Desc) { |
449 // Always pick NumPred=0 over NumPred>0. | 469 if (Item.Processed) |
450 constexpr int32_t WeightNoPreds = 4; | |
451 // Prefer Src as a register because the register might free up. | |
452 constexpr int32_t WeightSrcIsReg = 2; | |
453 // Prefer Dest not as a register because the register stays free longer. | |
454 constexpr int32_t WeightDestNotReg = 1; | |
455 | |
456 for (size_t I = 0; I < NumPhis; ++I) { | |
457 if (Desc[I].Processed) | |
458 continue; | 470 continue; |
459 int32_t Weight = 0; | 471 int32_t Weight = 0; |
460 if (Desc[I].NumPred == 0) | 472 if (Item.NumPred == 0) |
461 Weight += WeightNoPreds; | 473 Weight += WeightNoPreds; |
462 if (auto *Var = llvm::dyn_cast<Variable>(Desc[I].Src)) | 474 if (auto *Var = llvm::dyn_cast<Variable>(Item.Src)) |
463 if (Var->hasReg()) | 475 if (Var->hasReg()) |
464 Weight += WeightSrcIsReg; | 476 Weight += WeightSrcIsReg; |
465 if (!Desc[I].Dest->hasReg()) | 477 if (!Item.Dest->hasReg()) |
466 Weight += WeightDestNotReg; | 478 Weight += WeightDestNotReg; |
467 Desc[I].Weight = Weight; | 479 Item.Weight = Weight; |
468 } | 480 } |
469 | 481 |
470 // Repeatedly choose and process the best candidate in the topological | 482 // Repeatedly choose and process the best candidate in the topological sort, |
471 // sort, until no candidates remain. This implementation is O(N^2) where N | 483 // until no candidates remain. This implementation is O(N^2) where N is the |
472 // is the number of Phi instructions, but with a small constant factor | 484 // number of Phi instructions, but with a small constant factor compared to |
473 // compared to a likely implementation of O(N) topological sort. | 485 // a likely implementation of O(N) topological sort. |
474 for (; Remaining; --Remaining) { | 486 for (; Remaining; --Remaining) { |
475 size_t BestIndex = 0; | |
476 int32_t BestWeight = -1; | 487 int32_t BestWeight = -1; |
| 488 PhiDesc *BestItem = nullptr; |
477 // Find the best candidate. | 489 // Find the best candidate. |
478 for (size_t I = 0; I < NumPhis; ++I) { | 490 for (PhiDesc &Item : Desc) { |
479 if (Desc[I].Processed) | 491 if (Item.Processed) |
480 continue; | 492 continue; |
481 int32_t Weight = 0; | 493 int32_t Weight = 0; |
482 Weight = Desc[I].Weight; | 494 Weight = Item.Weight; |
483 if (Weight > BestWeight) { | 495 if (Weight > BestWeight) { |
484 BestIndex = I; | 496 BestItem = &Item; |
485 BestWeight = Weight; | 497 BestWeight = Weight; |
486 } | 498 } |
487 } | 499 } |
488 assert(BestWeight >= 0); | 500 assert(BestWeight >= 0); |
489 assert(Desc[BestIndex].NumPred <= 1); | 501 assert(BestItem->NumPred <= 1); |
490 Variable *Dest = Desc[BestIndex].Dest; | 502 Variable *Dest = BestItem->Dest; |
491 Operand *Src = Desc[BestIndex].Src; | 503 Operand *Src = BestItem->Src; |
492 assert(!sameVarOrReg(Target, Dest, Src)); | 504 assert(!sameVarOrReg(Target, Dest, Src)); |
493 // Break a cycle by introducing a temporary. | 505 // Break a cycle by introducing a temporary. |
494 if (Desc[BestIndex].NumPred) { | 506 if (BestItem->NumPred) { |
495 bool Found = false; | 507 bool Found = false; |
496 // If the target instruction "A=B" is part of a cycle, find the "X=A" | 508 // If the target instruction "A=B" is part of a cycle, find the "X=A" |
497 // assignment in the cycle because it will have to be rewritten as | 509 // assignment in the cycle because it will have to be rewritten as |
498 // "X=tmp". | 510 // "X=tmp". |
499 for (size_t J = 0; !Found && J < NumPhis; ++J) { | 511 for (PhiDesc &Item : Desc) { |
500 if (Desc[J].Processed) | 512 if (Item.Processed) |
501 continue; | 513 continue; |
502 Operand *OtherSrc = Desc[J].Src; | 514 Operand *OtherSrc = Item.Src; |
503 if (Desc[J].NumPred && sameVarOrReg(Target, Dest, OtherSrc)) { | 515 if (Item.NumPred && sameVarOrReg(Target, Dest, OtherSrc)) { |
504 SizeT VarNum = Func->getNumVariables(); | 516 SizeT VarNum = Func->getNumVariables(); |
505 Variable *Tmp = Func->makeVariable(OtherSrc->getType()); | 517 Variable *Tmp = Func->makeVariable(OtherSrc->getType()); |
506 if (BuildDefs::dump()) | 518 if (BuildDefs::dump()) |
507 Tmp->setName(Func, "__split_" + std::to_string(VarNum)); | 519 Tmp->setName(Func, "__split_" + std::to_string(VarNum)); |
508 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc)); | 520 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc)); |
509 Desc[J].Src = Tmp; | 521 Item.Src = Tmp; |
| 522 updatePreds(Desc, Target, llvm::cast<Variable>(OtherSrc)); |
510 Found = true; | 523 Found = true; |
| 524 break; |
511 } | 525 } |
512 } | 526 } |
513 assert(Found); | 527 assert(Found); |
514 } | 528 } |
515 // Now that a cycle (if any) has been broken, create the actual | 529 // Now that a cycle (if any) has been broken, create the actual |
516 // assignment. | 530 // assignment. |
517 Split->appendInst(InstAssign::create(Func, Dest, Src)); | 531 Split->appendInst(InstAssign::create(Func, Dest, Src)); |
518 // Update NumPred for all Phi assignments using this Phi's Src as their | 532 if (auto *Var = llvm::dyn_cast<Variable>(Src)) |
519 // Dest variable. Also update Weight if NumPred dropped from 1 to 0. | 533 updatePreds(Desc, Target, Var); |
520 if (auto *Var = llvm::dyn_cast<Variable>(Src)) { | 534 BestItem->Processed = true; |
521 for (size_t I = 0; I < NumPhis; ++I) { | |
522 if (Desc[I].Processed) | |
523 continue; | |
524 if (sameVarOrReg(Target, Var, Desc[I].Dest)) { | |
525 if (--Desc[I].NumPred == 0) | |
526 Desc[I].Weight += WeightNoPreds; | |
527 } | |
528 } | |
529 } | |
530 Desc[BestIndex].Processed = true; | |
531 } | 535 } |
532 Split->appendInst(InstBr::create(Func, this)); | 536 Split->appendInst(InstBr::create(Func, this)); |
533 | 537 |
534 Split->genCode(); | 538 Split->genCode(); |
535 Func->getVMetadata()->addNode(Split); | 539 Func->getVMetadata()->addNode(Split); |
| 540 // Validate to be safe. All items should be marked as processed, and have |
| 541 // no predecessors. |
| 542 if (BuildDefs::asserts()) { |
| 543 for (PhiDesc &Item : Desc) { |
| 544 (void)Item; |
| 545 assert(Item.Processed); |
| 546 assert(Item.NumPred == 0); |
| 547 } |
| 548 } |
536 } | 549 } |
537 } | 550 } |
538 | 551 |
539 // Does address mode optimization. Pass each instruction to the TargetLowering | 552 // Does address mode optimization. Pass each instruction to the TargetLowering |
540 // object. If it returns a new instruction (representing the optimized address | 553 // object. If it returns a new instruction (representing the optimized address |
541 // mode), then insert the new instruction and delete the old. | 554 // mode), then insert the new instruction and delete the old. |
542 void CfgNode::doAddressOpt() { | 555 void CfgNode::doAddressOpt() { |
543 TargetLowering *Target = Func->getTarget(); | 556 TargetLowering *Target = Func->getTarget(); |
544 LoweringContext &Context = Target->getContext(); | 557 LoweringContext &Context = Target->getContext(); |
545 Context.init(this); | 558 Context.init(this); |
(...skipping 846 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1392 InstIntrinsicCall *Inst = InstIntrinsicCall::create( | 1405 InstIntrinsicCall *Inst = InstIntrinsicCall::create( |
1393 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); | 1406 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); |
1394 Inst->addArg(AtomicRMWOp); | 1407 Inst->addArg(AtomicRMWOp); |
1395 Inst->addArg(Counter); | 1408 Inst->addArg(Counter); |
1396 Inst->addArg(One); | 1409 Inst->addArg(One); |
1397 Inst->addArg(OrderAcquireRelease); | 1410 Inst->addArg(OrderAcquireRelease); |
1398 Insts.push_front(Inst); | 1411 Insts.push_front(Inst); |
1399 } | 1412 } |
1400 | 1413 |
1401 } // end of namespace Ice | 1414 } // end of namespace Ice |
OLD | NEW |