OLD | NEW |
---|---|
1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// | 1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 280 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
291 for (Inst &I : Pred->getInsts()) | 291 for (Inst &I : Pred->getInsts()) |
292 if (!I.isDeleted() && I.repointEdges(this, NewNode)) | 292 if (!I.isDeleted() && I.repointEdges(this, NewNode)) |
293 Found = true; | 293 Found = true; |
294 assert(Found); | 294 assert(Found); |
295 (void)Found; | 295 (void)Found; |
296 return NewNode; | 296 return NewNode; |
297 } | 297 } |
298 | 298 |
299 namespace { | 299 namespace { |
300 | 300 |
301 // Helper function used by advancedPhiLowering(). | 301 // Helpers for advancedPhiLowering(). |
302 | |
303 class PhiDesc { | |
304 PhiDesc &operator=(const PhiDesc &) = delete; | |
305 public: | |
306 PhiDesc(InstPhi *Phi, Variable *Dest) : Phi(Phi), Dest(Dest) {} | |
307 PhiDesc() = default; | |
308 PhiDesc(const PhiDesc &) = default; | |
John
2015/11/09 16:23:29
This struct is somewhat heavy for blindly allowing
Jim Stichnoth
2015/11/09 18:17:35
Done (in the new CL), and then deleted the default
| |
309 InstPhi *Phi = nullptr; | |
310 Variable *Dest = nullptr; | |
311 Operand *Src = nullptr; | |
312 bool Processed = false; | |
313 size_t NumPred = 0; // number of entries whose Src is this Dest | |
314 int32_t Weight = 0; // preference for topological order | |
315 }; | |
316 using PhiDescList = llvm::SmallVector<PhiDesc, 32>; | |
317 | |
318 // Always pick NumPred=0 over NumPred>0. | |
319 constexpr int32_t WeightNoPreds = 4; | |
320 // Prefer Src as a register because the register might free up. | |
321 constexpr int32_t WeightSrcIsReg = 2; | |
322 // Prefer Dest not as a register because the register stays free longer. | |
323 constexpr int32_t WeightDestNotReg = 1; | |
324 | |
302 bool sameVarOrReg(TargetLowering *Target, const Variable *Var1, | 325 bool sameVarOrReg(TargetLowering *Target, const Variable *Var1, |
303 const Operand *Opnd) { | 326 const Operand *Opnd) { |
304 if (Var1 == Opnd) | 327 if (Var1 == Opnd) |
305 return true; | 328 return true; |
306 const auto *Var2 = llvm::dyn_cast<Variable>(Opnd); | 329 const auto *Var2 = llvm::dyn_cast<Variable>(Opnd); |
307 if (Var2 == nullptr) | 330 if (Var2 == nullptr) |
308 return false; | 331 return false; |
309 | 332 |
310 // If either operand lacks a register, they cannot be the same. | 333 // If either operand lacks a register, they cannot be the same. |
311 if (!Var1->hasReg()) | 334 if (!Var1->hasReg()) |
312 return false; | 335 return false; |
313 if (!Var2->hasReg()) | 336 if (!Var2->hasReg()) |
314 return false; | 337 return false; |
315 | 338 |
316 int32_t RegNum1 = Var1->getRegNum(); | 339 int32_t RegNum1 = Var1->getRegNum(); |
317 int32_t RegNum2 = Var2->getRegNum(); | 340 int32_t RegNum2 = Var2->getRegNum(); |
318 // Quick common-case check. | 341 // Quick common-case check. |
319 if (RegNum1 == RegNum2) | 342 if (RegNum1 == RegNum2) |
320 return true; | 343 return true; |
321 | 344 |
322 assert(Target->getAliasesForRegister(RegNum1)[RegNum2] == | 345 assert(Target->getAliasesForRegister(RegNum1)[RegNum2] == |
323 Target->getAliasesForRegister(RegNum2)[RegNum1]); | 346 Target->getAliasesForRegister(RegNum2)[RegNum1]); |
324 return Target->getAliasesForRegister(RegNum1)[RegNum2]; | 347 return Target->getAliasesForRegister(RegNum1)[RegNum2]; |
325 } | 348 } |
326 | 349 |
350 // Update NumPred for all Phi assignments using Var as their Dest variable. | |
351 // Also update Weight if NumPred dropped from 1 to 0. | |
352 void updatePreds(PhiDescList &Desc, TargetLowering *Target, Variable *Var) { | |
353 for (PhiDesc &Item : Desc) { | |
354 if (!Item.Processed && sameVarOrReg(Target, Var, Item.Dest)) { | |
355 if (--Item.NumPred == 0) { | |
356 Item.Weight += WeightNoPreds; | |
357 } | |
358 } | |
359 } | |
360 } | |
361 | |
327 } // end of anonymous namespace | 362 } // end of anonymous namespace |
328 | 363 |
329 // This the "advanced" version of Phi lowering for a basic block, in contrast | 364 // This the "advanced" version of Phi lowering for a basic block, in contrast |
330 // to the simple version that lowers through assignments involving temporaries. | 365 // to the simple version that lowers through assignments involving temporaries. |
331 // | 366 // |
332 // All Phi instructions in a basic block are conceptually executed in parallel. | 367 // All Phi instructions in a basic block are conceptually executed in parallel. |
333 // However, if we lower Phis early and commit to a sequential ordering, we may | 368 // However, if we lower Phis early and commit to a sequential ordering, we may |
334 // end up creating unnecessary interferences which lead to worse register | 369 // end up creating unnecessary interferences which lead to worse register |
335 // allocation. Delaying Phi scheduling until after register allocation can help | 370 // allocation. Delaying Phi scheduling until after register allocation can help |
336 // unless there are no free registers for shuffling registers or stack slots | 371 // unless there are no free registers for shuffling registers or stack slots |
(...skipping 24 matching lines...) Expand all Loading... | |
361 // After phi assignments are lowered across all blocks, another register | 396 // After phi assignments are lowered across all blocks, another register |
362 // allocation pass is run, focusing only on pre-colored and infinite-weight | 397 // allocation pass is run, focusing only on pre-colored and infinite-weight |
363 // variables, similar to Om1 register allocation (except without the need to | 398 // variables, similar to Om1 register allocation (except without the need to |
364 // specially compute these variables' live ranges, since they have already been | 399 // specially compute these variables' live ranges, since they have already been |
365 // precisely calculated). The register allocator in this mode needs the ability | 400 // precisely calculated). The register allocator in this mode needs the ability |
366 // to forcibly spill and reload registers in case none are naturally available. | 401 // to forcibly spill and reload registers in case none are naturally available. |
367 void CfgNode::advancedPhiLowering() { | 402 void CfgNode::advancedPhiLowering() { |
368 if (getPhis().empty()) | 403 if (getPhis().empty()) |
369 return; | 404 return; |
370 | 405 |
371 // Count the number of non-deleted Phi instructions. | 406 PhiDescList Desc; |
372 struct PhiDesc { | |
373 InstPhi *Phi; | |
374 Variable *Dest; | |
375 Operand *Src; | |
376 bool Processed; | |
377 size_t NumPred; // number of entries whose Src is this Dest | |
378 int32_t Weight; // preference for topological order | |
379 }; | |
380 llvm::SmallVector<PhiDesc, 32> Desc(getPhis().size()); | |
381 | 407 |
382 size_t NumPhis = 0; | |
383 for (Inst &I : Phis) { | 408 for (Inst &I : Phis) { |
384 auto *Inst = llvm::dyn_cast<InstPhi>(&I); | 409 auto *Phi = llvm::dyn_cast<InstPhi>(&I); |
385 if (!Inst->isDeleted()) { | 410 if (!Phi->isDeleted()) { |
386 Variable *Dest = Inst->getDest(); | 411 Variable *Dest = Phi->getDest(); |
387 Desc[NumPhis].Phi = Inst; | 412 Desc.emplace_back(PhiDesc(Phi, Dest)); |
John
2015/11/09 16:23:29
You should use
Desc.emplace(Phi, Dest);
instead.
Jim Stichnoth
2015/11/09 18:17:35
Unfortunately, llvm::SmallVector doesn't provide a
| |
388 Desc[NumPhis].Dest = Dest; | |
389 ++NumPhis; | |
390 // Undo the effect of the phi instruction on this node's live-in set by | 413 // Undo the effect of the phi instruction on this node's live-in set by |
391 // marking the phi dest variable as live on entry. | 414 // marking the phi dest variable as live on entry. |
392 SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex()); | 415 SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex()); |
393 assert(!Func->getLiveness()->getLiveIn(this)[VarNum]); | 416 assert(!Func->getLiveness()->getLiveIn(this)[VarNum]); |
394 Func->getLiveness()->getLiveIn(this)[VarNum] = true; | 417 Func->getLiveness()->getLiveIn(this)[VarNum] = true; |
395 Inst->setDeleted(); | 418 Phi->setDeleted(); |
396 } | 419 } |
397 } | 420 } |
398 if (NumPhis == 0) | 421 if (Desc.empty()) |
399 return; | 422 return; |
400 | 423 |
401 TargetLowering *Target = Func->getTarget(); | 424 TargetLowering *Target = Func->getTarget(); |
402 SizeT InEdgeIndex = 0; | 425 SizeT InEdgeIndex = 0; |
403 for (CfgNode *Pred : InEdges) { | 426 for (CfgNode *Pred : InEdges) { |
404 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); | 427 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); |
405 SizeT Remaining = NumPhis; | 428 SizeT Remaining = Desc.size(); |
406 | 429 |
407 // First pass computes Src and initializes NumPred. | 430 // First pass computes Src and initializes NumPred. |
408 for (size_t I = 0; I < NumPhis; ++I) { | 431 for (PhiDesc &Item : Desc) { |
409 Variable *Dest = Desc[I].Dest; | 432 Variable *Dest = Item.Dest; |
410 Operand *Src = Desc[I].Phi->getOperandForTarget(Pred); | 433 Operand *Src = Item.Phi->getOperandForTarget(Pred); |
411 Desc[I].Src = Src; | 434 Item.Src = Src; |
412 Desc[I].Processed = false; | 435 Item.Processed = false; |
413 Desc[I].NumPred = 0; | 436 Item.NumPred = 0; |
414 // Cherry-pick any trivial assignments, so that they don't contribute to | 437 // Cherry-pick any trivial assignments, so that they don't contribute to |
415 // the running complexity of the topological sort. | 438 // the running complexity of the topological sort. |
416 if (sameVarOrReg(Target, Dest, Src)) { | 439 if (sameVarOrReg(Target, Dest, Src)) { |
417 Desc[I].Processed = true; | 440 Item.Processed = true; |
418 --Remaining; | 441 --Remaining; |
419 if (Dest != Src) | 442 if (Dest != Src) |
420 // If Dest and Src are syntactically the same, don't bother adding | 443 // If Dest and Src are syntactically the same, don't bother adding |
421 // the assignment, because in all respects it would be redundant, and | 444 // the assignment, because in all respects it would be redundant, and |
422 // if Dest/Src are on the stack, the target lowering may naively | 445 // if Dest/Src are on the stack, the target lowering may naively |
423 // decide to lower it using a temporary register. | 446 // decide to lower it using a temporary register. |
424 Split->appendInst(InstAssign::create(Func, Dest, Src)); | 447 Split->appendInst(InstAssign::create(Func, Dest, Src)); |
425 } | 448 } |
426 } | 449 } |
427 // Second pass computes NumPred by comparing every pair of Phi | 450 // Second pass computes NumPred by comparing every pair of Phi instructions. |
428 // instructions. | 451 for (PhiDesc &Item : Desc) { |
429 for (size_t I = 0; I < NumPhis; ++I) { | 452 if (Item.Processed) |
430 if (Desc[I].Processed) | |
431 continue; | 453 continue; |
432 const Variable *Dest = Desc[I].Dest; | 454 const Variable *Dest = Item.Dest; |
433 for (size_t J = 0; J < NumPhis; ++J) { | 455 for (PhiDesc &Item2 : Desc) { |
434 if (Desc[J].Processed) | 456 if (Item2.Processed) |
435 continue; | 457 continue; |
436 if (I != J) { | 458 // There shouldn't be two different Phis with the same Dest variable or |
437 // There shouldn't be two Phis with the same Dest variable or | 459 // register. |
438 // register. | 460 assert((&Item == &Item2) || !sameVarOrReg(Target, Dest, Item2.Dest)); |
439 assert(!sameVarOrReg(Target, Dest, Desc[J].Dest)); | 461 if (sameVarOrReg(Target, Dest, Item2.Src)) |
440 } | 462 ++Item.NumPred; |
441 const Operand *Src = Desc[J].Src; | |
442 if (sameVarOrReg(Target, Dest, Src)) | |
443 ++Desc[I].NumPred; | |
444 } | 463 } |
445 } | 464 } |
446 | 465 |
447 // Another pass to compute initial Weight values. | 466 // Another pass to compute initial Weight values. |
448 | 467 for (PhiDesc &Item : Desc) { |
449 // Always pick NumPred=0 over NumPred>0. | 468 if (Item.Processed) |
450 constexpr int32_t WeightNoPreds = 4; | |
451 // Prefer Src as a register because the register might free up. | |
452 constexpr int32_t WeightSrcIsReg = 2; | |
453 // Prefer Dest not as a register because the register stays free longer. | |
454 constexpr int32_t WeightDestNotReg = 1; | |
455 | |
456 for (size_t I = 0; I < NumPhis; ++I) { | |
457 if (Desc[I].Processed) | |
458 continue; | 469 continue; |
459 int32_t Weight = 0; | 470 int32_t Weight = 0; |
460 if (Desc[I].NumPred == 0) | 471 if (Item.NumPred == 0) |
461 Weight += WeightNoPreds; | 472 Weight += WeightNoPreds; |
462 if (auto *Var = llvm::dyn_cast<Variable>(Desc[I].Src)) | 473 if (auto *Var = llvm::dyn_cast<Variable>(Item.Src)) |
463 if (Var->hasReg()) | 474 if (Var->hasReg()) |
464 Weight += WeightSrcIsReg; | 475 Weight += WeightSrcIsReg; |
465 if (!Desc[I].Dest->hasReg()) | 476 if (!Item.Dest->hasReg()) |
466 Weight += WeightDestNotReg; | 477 Weight += WeightDestNotReg; |
467 Desc[I].Weight = Weight; | 478 Item.Weight = Weight; |
468 } | 479 } |
469 | 480 |
470 // Repeatedly choose and process the best candidate in the topological | 481 // Repeatedly choose and process the best candidate in the topological sort, |
471 // sort, until no candidates remain. This implementation is O(N^2) where N | 482 // until no candidates remain. This implementation is O(N^2) where N is the |
472 // is the number of Phi instructions, but with a small constant factor | 483 // number of Phi instructions, but with a small constant factor compared to |
473 // compared to a likely implementation of O(N) topological sort. | 484 // a likely implementation of O(N) topological sort. |
474 for (; Remaining; --Remaining) { | 485 for (; Remaining; --Remaining) { |
475 size_t BestIndex = 0; | |
476 int32_t BestWeight = -1; | 486 int32_t BestWeight = -1; |
487 PhiDesc *BestItem = nullptr; | |
477 // Find the best candidate. | 488 // Find the best candidate. |
478 for (size_t I = 0; I < NumPhis; ++I) { | 489 for (PhiDesc &Item : Desc) { |
479 if (Desc[I].Processed) | 490 if (Item.Processed) |
480 continue; | 491 continue; |
481 int32_t Weight = 0; | 492 int32_t Weight = 0; |
482 Weight = Desc[I].Weight; | 493 Weight = Item.Weight; |
483 if (Weight > BestWeight) { | 494 if (Weight > BestWeight) { |
484 BestIndex = I; | 495 BestItem = &Item; |
485 BestWeight = Weight; | 496 BestWeight = Weight; |
486 } | 497 } |
487 } | 498 } |
488 assert(BestWeight >= 0); | 499 assert(BestWeight >= 0); |
489 assert(Desc[BestIndex].NumPred <= 1); | 500 assert(BestItem->NumPred <= 1); |
490 Variable *Dest = Desc[BestIndex].Dest; | 501 Variable *Dest = BestItem->Dest; |
491 Operand *Src = Desc[BestIndex].Src; | 502 Operand *Src = BestItem->Src; |
492 assert(!sameVarOrReg(Target, Dest, Src)); | 503 assert(!sameVarOrReg(Target, Dest, Src)); |
493 // Break a cycle by introducing a temporary. | 504 // Break a cycle by introducing a temporary. |
494 if (Desc[BestIndex].NumPred) { | 505 if (BestItem->NumPred) { |
495 bool Found = false; | 506 bool Found = false; |
496 // If the target instruction "A=B" is part of a cycle, find the "X=A" | 507 // If the target instruction "A=B" is part of a cycle, find the "X=A" |
497 // assignment in the cycle because it will have to be rewritten as | 508 // assignment in the cycle because it will have to be rewritten as |
498 // "X=tmp". | 509 // "X=tmp". |
499 for (size_t J = 0; !Found && J < NumPhis; ++J) { | 510 for (PhiDesc &Item : Desc) { |
500 if (Desc[J].Processed) | 511 if (Item.Processed) |
501 continue; | 512 continue; |
502 Operand *OtherSrc = Desc[J].Src; | 513 Operand *OtherSrc = Item.Src; |
503 if (Desc[J].NumPred && sameVarOrReg(Target, Dest, OtherSrc)) { | 514 if (Item.NumPred && sameVarOrReg(Target, Dest, OtherSrc)) { |
504 SizeT VarNum = Func->getNumVariables(); | 515 SizeT VarNum = Func->getNumVariables(); |
505 Variable *Tmp = Func->makeVariable(OtherSrc->getType()); | 516 Variable *Tmp = Func->makeVariable(OtherSrc->getType()); |
506 if (BuildDefs::dump()) | 517 if (BuildDefs::dump()) |
507 Tmp->setName(Func, "__split_" + std::to_string(VarNum)); | 518 Tmp->setName(Func, "__split_" + std::to_string(VarNum)); |
508 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc)); | 519 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc)); |
509 Desc[J].Src = Tmp; | 520 Item.Src = Tmp; |
521 updatePreds(Desc, Target, llvm::cast<Variable>(OtherSrc)); | |
Jim Stichnoth
2015/11/08 17:45:58
This fixes the advanced phi lowering bug mentioned
John
2015/11/09 16:23:29
optional: Would you mind sending this in a separat
Jim Stichnoth
2015/11/09 18:17:35
Done.
| |
510 Found = true; | 522 Found = true; |
523 break; | |
511 } | 524 } |
512 } | 525 } |
513 assert(Found); | 526 assert(Found); |
514 } | 527 } |
515 // Now that a cycle (if any) has been broken, create the actual | 528 // Now that a cycle (if any) has been broken, create the actual |
516 // assignment. | 529 // assignment. |
517 Split->appendInst(InstAssign::create(Func, Dest, Src)); | 530 Split->appendInst(InstAssign::create(Func, Dest, Src)); |
518 // Update NumPred for all Phi assignments using this Phi's Src as their | 531 if (auto *Var = llvm::dyn_cast<Variable>(Src)) |
519 // Dest variable. Also update Weight if NumPred dropped from 1 to 0. | 532 updatePreds(Desc, Target, Var); |
520 if (auto *Var = llvm::dyn_cast<Variable>(Src)) { | 533 BestItem->Processed = true; |
521 for (size_t I = 0; I < NumPhis; ++I) { | |
522 if (Desc[I].Processed) | |
523 continue; | |
524 if (sameVarOrReg(Target, Var, Desc[I].Dest)) { | |
525 if (--Desc[I].NumPred == 0) | |
526 Desc[I].Weight += WeightNoPreds; | |
527 } | |
528 } | |
529 } | |
530 Desc[BestIndex].Processed = true; | |
531 } | 534 } |
532 Split->appendInst(InstBr::create(Func, this)); | 535 Split->appendInst(InstBr::create(Func, this)); |
533 | 536 |
534 Split->genCode(); | 537 Split->genCode(); |
535 Func->getVMetadata()->addNode(Split); | 538 Func->getVMetadata()->addNode(Split); |
539 // Validate to be safe. All items should be marked as processed, and have | |
540 // no predecessors. | |
541 if (BuildDefs::asserts()) { | |
542 for (PhiDesc &Item : Desc) { | |
543 (void)Item; | |
544 assert(Item.Processed); | |
545 assert(Item.NumPred == 0); | |
546 } | |
547 } | |
536 } | 548 } |
537 } | 549 } |
538 | 550 |
539 // Does address mode optimization. Pass each instruction to the TargetLowering | 551 // Does address mode optimization. Pass each instruction to the TargetLowering |
540 // object. If it returns a new instruction (representing the optimized address | 552 // object. If it returns a new instruction (representing the optimized address |
541 // mode), then insert the new instruction and delete the old. | 553 // mode), then insert the new instruction and delete the old. |
542 void CfgNode::doAddressOpt() { | 554 void CfgNode::doAddressOpt() { |
543 TargetLowering *Target = Func->getTarget(); | 555 TargetLowering *Target = Func->getTarget(); |
544 LoweringContext &Context = Target->getContext(); | 556 LoweringContext &Context = Target->getContext(); |
545 Context.init(this); | 557 Context.init(this); |
(...skipping 846 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1392 InstIntrinsicCall *Inst = InstIntrinsicCall::create( | 1404 InstIntrinsicCall *Inst = InstIntrinsicCall::create( |
1393 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); | 1405 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); |
1394 Inst->addArg(AtomicRMWOp); | 1406 Inst->addArg(AtomicRMWOp); |
1395 Inst->addArg(Counter); | 1407 Inst->addArg(Counter); |
1396 Inst->addArg(One); | 1408 Inst->addArg(One); |
1397 Inst->addArg(OrderAcquireRelease); | 1409 Inst->addArg(OrderAcquireRelease); |
1398 Insts.push_front(Inst); | 1410 Insts.push_front(Inst); |
1399 } | 1411 } |
1400 | 1412 |
1401 } // end of namespace Ice | 1413 } // end of namespace Ice |
OLD | NEW |