Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(159)

Side by Side Diff: src/IceTargetLoweringARM32.h

Issue 1417393003: Subzero. ARM32. New bool folding. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Adds --force to check-spec Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTLS.h ('k') | src/IceTargetLoweringARM32.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===// 1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
128 bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const { 128 bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
129 return CPUFeatures.hasFeature(I); 129 return CPUFeatures.hasFeature(I);
130 } 130 }
131 Operand *legalizeUndef(Operand *From, int32_t RegNum = Variable::NoRegister); 131 Operand *legalizeUndef(Operand *From, int32_t RegNum = Variable::NoRegister);
132 132
133 protected: 133 protected:
134 explicit TargetARM32(Cfg *Func); 134 explicit TargetARM32(Cfg *Func);
135 135
136 void postLower() override; 136 void postLower() override;
137 137
138 enum SafeBoolChain {
139 SBC_No,
140 SBC_Yes,
141 };
142
138 void lowerAlloca(const InstAlloca *Inst) override; 143 void lowerAlloca(const InstAlloca *Inst) override;
144 SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Inst);
139 void lowerArithmetic(const InstArithmetic *Inst) override; 145 void lowerArithmetic(const InstArithmetic *Inst) override;
140 void lowerAssign(const InstAssign *Inst) override; 146 void lowerAssign(const InstAssign *Inst) override;
141 void lowerBr(const InstBr *Inst) override; 147 void lowerBr(const InstBr *Inst) override;
142 void lowerCall(const InstCall *Inst) override; 148 void lowerCall(const InstCall *Inst) override;
143 void lowerCast(const InstCast *Inst) override; 149 void lowerCast(const InstCast *Inst) override;
144 void lowerExtractElement(const InstExtractElement *Inst) override; 150 void lowerExtractElement(const InstExtractElement *Inst) override;
145 void lowerFcmpCond(const InstFcmp *Instr, CondARM32::Cond *CondIfTrue0, 151
146 CondARM32::Cond *CondIfTrue1, 152 /// CondWhenTrue is a helper type returned by every method in the lowering
147 CondARM32::Cond *CondIfFalse); 153 /// that emits code to set the condition codes.
154 class CondWhenTrue {
155 public:
156 explicit CondWhenTrue(CondARM32::Cond T0,
157 CondARM32::Cond T1 = CondARM32::kNone)
158 : WhenTrue0(T0), WhenTrue1(T1) {
159 assert(T1 == CondARM32::kNone || T0 != CondARM32::kNone);
160 assert(T1 != T0 || T0 == CondARM32::kNone);
161 }
162 CondARM32::Cond WhenTrue0;
163 CondARM32::Cond WhenTrue1;
164
165 /// invert returns a new object with WhenTrue0 and WhenTrue1 inverted.
166 CondWhenTrue invert() const {
167 switch (WhenTrue0) {
168 default:
169 if (WhenTrue1 == CondARM32::kNone)
170 return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0));
171 return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0),
172 InstARM32::getOppositeCondition(WhenTrue1));
173 case CondARM32::AL:
174 return CondWhenTrue(CondARM32::kNone);
175 case CondARM32::kNone:
176 return CondWhenTrue(CondARM32::AL);
177 }
178 }
179 };
180
181 CondWhenTrue lowerFcmpCond(const InstFcmp *Instr);
148 void lowerFcmp(const InstFcmp *Instr) override; 182 void lowerFcmp(const InstFcmp *Instr) override;
149 void lowerIcmpCond(const InstIcmp *Instr, CondARM32::Cond *CondIfTrue, 183 CondWhenTrue lowerIcmpCond(const InstIcmp *Instr);
150 CondARM32::Cond *CondIfFalse);
151 void lowerIcmp(const InstIcmp *Instr) override; 184 void lowerIcmp(const InstIcmp *Instr) override;
152 void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, 185 void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
153 Operand *Val); 186 Operand *Val);
154 void lowerIntrinsicCall(const InstIntrinsicCall *Inst) override; 187 void lowerIntrinsicCall(const InstIntrinsicCall *Inst) override;
155 void lowerInsertElement(const InstInsertElement *Inst) override; 188 void lowerInsertElement(const InstInsertElement *Inst) override;
156 void lowerLoad(const InstLoad *Inst) override; 189 void lowerLoad(const InstLoad *Inst) override;
157 void lowerPhi(const InstPhi *Inst) override; 190 void lowerPhi(const InstPhi *Inst) override;
158 void lowerRet(const InstRet *Inst) override; 191 void lowerRet(const InstRet *Inst) override;
159 void lowerSelect(const InstSelect *Inst) override; 192 void lowerSelect(const InstSelect *Inst) override;
160 void lowerStore(const InstStore *Inst) override; 193 void lowerStore(const InstStore *Inst) override;
(...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after
327 Instr->setDestRedefined(); 360 Instr->setDestRedefined();
328 Context.insert(Instr); 361 Context.insert(Instr);
329 if (Instr->isMultiDest()) { 362 if (Instr->isMultiDest()) {
330 // If Instr is multi-dest, then Dest must be a Variable64On32. We add a 363 // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
331 // fake-def for Instr.DestHi here. 364 // fake-def for Instr.DestHi here.
332 assert(llvm::isa<Variable64On32>(Dest)); 365 assert(llvm::isa<Variable64On32>(Dest));
333 Context.insert(InstFakeDef::create(Func, Instr->getDestHi())); 366 Context.insert(InstFakeDef::create(Func, Instr->getDestHi()));
334 } 367 }
335 } 368 }
336 369
337 // _mov_i1_to_flags is used for bool folding. If "Boolean" is folded, this 370 // --------------------------------------------------------------------------
338 // method returns true, and sets "CondIfTrue0" and "CondIfTrue1" to the 371 // Begin bool folding machinery.
339 // appropriate ARM condition codes. If "Boolean" is not to be folded, then 372 //
340 // this method returns false. 373 // There are three types of boolean lowerings handled by this target:
341 bool _mov_i1_to_flags(Operand *Boolean, CondARM32::Cond *CondIfTrue0, 374 //
342 CondARM32::Cond *CondIfTrue1, 375 // 1) Boolean expressions leading to a boolean Variable definition
343 CondARM32::Cond *CondIfFalse); 376 // ---------------------------------------------------------------
344 377 //
345 // _cmov is a pseudo instruction that is used for boolean folding. It emits 378 // Whenever a i1 Variable is live out (i.e., its live range extends beyond
346 // code that moves "SrcIfTrue" to dest if either "CondIfTrue0" or 379 // the defining basic block) we do not fold the operation. We instead
347 // "CondIfTrue1" holds, and "SrcIfFalse", if "CondIfFalse" holds. It requires 380 // materialize (i.e., compute) the variable normally, so that it can be used
348 // "Dest" to be an infinite-weight temporary. 381 // when needed. We also materialize i1 values that are not single use to
349 void _cmov(Variable *Dest, Operand *SrcIfTrue, CondARM32::Cond CondIfTrue0, 382 // avoid code duplication. These expressions are not short circuited.
350 CondARM32::Cond CondIfTrue1, Operand *SrcIfFalse, 383 //
351 CondARM32::Cond CondIfFalse) { 384 // 2) Boolean expressions leading to a select
352 assert(Dest->mustHaveReg()); 385 // ------------------------------------------
353 386 //
354 if (CondIfFalse == CondARM32::kNone) { 387 // These include boolean chains leading to a select instruction, as well as
355 assert(CondIfTrue0 == CondARM32::AL); 388 // i1 Sexts. These boolean expressions are lowered to:
356 assert(CondIfTrue1 == CondARM32::kNone); 389 //
357 } 390 // mov T, <false value>
358 391 // CC <- eval(Boolean Expression)
359 if (CondIfTrue0 == CondARM32::kNone) { 392 // movCC T, <true value>
360 assert(CondIfFalse == CondARM32::AL); 393 //
361 assert(CondIfTrue1 == CondARM32::kNone); 394 // For Sexts, <false value> is 0, and <true value> is -1.
362 } 395 //
363 396 // 3) Boolean expressions leading to a br i1
364 if (CondIfTrue1 != CondARM32::kNone) { 397 // -----------------------------------------
365 assert(CondIfFalse == CondARM32::AL); 398 //
366 assert(CondIfTrue1 != CondARM32::kNone); 399 // These are the boolean chains leading to a branch. These chains are
367 } 400 // short-circuited, i.e.:
368 401 //
369 bool RedefineT = false; 402 // A = or i1 B, C
370 if (CondIfFalse != CondARM32::kNone) { 403 // br i1 A, label %T, label %F
371 _mov(Dest, SrcIfFalse, CondIfFalse); 404 //
372 RedefineT = true; 405 // becomes
373 } 406 //
374 407 // tst B
375 if (CondIfTrue0 != CondARM32::kNone) { 408 // jne %T
376 if (RedefineT) { 409 // tst B
377 _mov_redefined(Dest, SrcIfTrue, CondIfTrue0); 410 // jne %T
378 } else { 411 // j %F
379 _mov(Dest, SrcIfTrue, CondIfTrue0); 412 //
380 } 413 // and
381 RedefineT = true; 414 //
382 } 415 // A = and i1 B, C
383 416 // br i1 A, label %T, label %F
384 if (CondIfTrue1 != CondARM32::kNone) { 417 //
385 assert(RedefineT); 418 // becomes
386 _mov_redefined(Dest, SrcIfTrue, CondIfTrue1); 419 //
387 } 420 // tst B
421 // jeq %F
422 // tst B
423 // jeq %F
424 // j %T
425 //
426 // Arbitrarily long chains are short circuited, e.g
427 //
428 // A = or i1 B, C
429 // D = and i1 A, E
430 // F = and i1 G, H
431 // I = or i1 D, F
432 // br i1 I, label %True, label %False
433 //
434 // becomes
435 //
436 // Label[A]:
437 // tst B, 1
438 // bne Label[D]
439 // tst C, 1
440 // beq Label[I]
441 // Label[D]:
442 // tst E, 1
443 // bne %True
444 // Label[I]
445 // tst G, 1
446 // beq %False
447 // tst H, 1
448 // beq %False (bne %True)
449
450 /// lowerInt1 materializes Boolean to a Variable.
451 SafeBoolChain lowerInt1(Variable *Dest, Operand *Boolean);
452
453 /// lowerInt1ForSelect generates the following instruction sequence:
454 ///
455 /// mov T, FalseValue
456 /// CC <- eval(Boolean)
457 /// movCC T, TrueValue
458 /// mov Dest, T
459 ///
460 /// It is used for lowering select i1, as well as i1 Sext.
461 void lowerInt1ForSelect(Variable *Dest, Operand *Boolean, Operand *TrueValue,
462 Operand *FalseValue);
463
464 /// LowerInt1BranchTarget is used by lowerIntForBranch. It wraps a CfgNode, or
465 /// an InstARM32Label (but never both) so that, during br i1 lowering, we can
466 /// create auxiliary labels for short circuiting the condition evaluation.
467 class LowerInt1BranchTarget {
468 public:
469 explicit LowerInt1BranchTarget(CfgNode *const Target)
470 : NodeTarget(Target) {}
471 explicit LowerInt1BranchTarget(InstARM32Label *const Target)
472 : LabelTarget(Target) {}
473
474 /// createForLabelOrDuplicate will return a new LowerInt1BranchTarget that
475 /// is the exact copy of this if Label is nullptr; otherwise, the returned
476 /// object will wrap Label instead.
477 LowerInt1BranchTarget
478 createForLabelOrDuplicate(InstARM32Label *Label) const {
479 if (Label != nullptr)
480 return LowerInt1BranchTarget(Label);
481 if (NodeTarget)
482 return LowerInt1BranchTarget(NodeTarget);
483 return LowerInt1BranchTarget(LabelTarget);
484 }
485
486 CfgNode *const NodeTarget = nullptr;
487 InstARM32Label *const LabelTarget = nullptr;
488 };
489
490 /// LowerInt1AllowShortCircuit is a helper type used by lowerInt1ForBranch for
491 /// determining which type arithmetic is allowed to be short circuited. This
492 /// is useful for lowering
493 ///
494 /// t1 = and i1 A, B
495 /// t2 = and i1 t1, C
496 /// br i1 t2, label %False, label %True
497 ///
498 /// to
499 ///
500 /// tst A, 1
501 /// beq %False
502 /// tst B, 1
503 /// beq %False
504 /// tst C, 1
505 /// bne %True
506 /// b %False
507 ///
508 /// Without this information, short circuiting would only allow to short
509 /// circuit a single high level instruction. For example:
510 ///
511 /// t1 = or i1 A, B
512 /// t2 = and i1 t1, C
513 /// br i1 t2, label %False, label %True
514 ///
515 /// cannot be lowered to
516 ///
517 /// tst A, 1
518 /// bne %True
519 /// tst B, 1
520 /// bne %True
521 /// tst C, 1
522 /// beq %True
523 /// b %False
524 ///
525 /// It needs to be lowered to
526 ///
527 /// tst A, 1
528 /// bne Aux
529 /// tst B, 1
530 /// beq %False
531 /// Aux:
532 /// tst C, 1
533 /// bne %True
534 /// b %False
535 ///
536 /// TODO(jpp): evaluate if this kind of short circuiting hurts performance (it
537 /// might.)
538 enum LowerInt1AllowShortCircuit {
539 SC_And = 1,
540 SC_Or = 2,
541 SC_All = SC_And | SC_Or,
542 };
543
544 /// ShortCircuitCondAndLabel wraps the condition codes that should be used
545 /// after a lowerInt1ForBranch returns to branch to the
546 /// TrueTarget/FalseTarget. If ShortCircuitLabel is not nullptr, then the
547 /// called lowerInt1forBranch created an internal (i.e., short-circuit) label
548 /// used for short circuiting.
549 class ShortCircuitCondAndLabel {
550 public:
551 explicit ShortCircuitCondAndLabel(CondWhenTrue &&C,
552 InstARM32Label *L = nullptr)
553 : Cond(std::move(C)), ShortCircuitTarget(L) {}
554 const CondWhenTrue Cond;
555 InstARM32Label *const ShortCircuitTarget;
556
557 CondWhenTrue assertNoLabelAndReturnCond() const {
558 assert(ShortCircuitTarget == nullptr);
559 return Cond;
560 }
561 };
562
563 /// lowerInt1ForBranch expands Boolean, and returns the condition codes that
564 /// are to be used for branching to the branch's TrueTarget. It may return a
565 /// label that the expansion of Boolean used to short circuit the chain's
566 /// evaluation.
567 ShortCircuitCondAndLabel
568 lowerInt1ForBranch(Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
569 const LowerInt1BranchTarget &TargetFalse,
570 uint32_t ShortCircuitable);
571
572 // _br is a convenience wrapper that emits br instructions to Target.
573 void _br(const LowerInt1BranchTarget &BrTarget,
574 CondARM32::Cond Cond = CondARM32::AL) {
575 assert((BrTarget.NodeTarget == nullptr) !=
576 (BrTarget.LabelTarget == nullptr));
577 if (BrTarget.NodeTarget != nullptr)
578 _br(BrTarget.NodeTarget, Cond);
579 else
580 _br(BrTarget.LabelTarget, Cond);
388 } 581 }
389 582
583 // _br_short_circuit is used when lowering InstArithmetic::And and
584 // InstArithmetic::Or and a short circuit branch is needed.
585 void _br_short_circuit(const LowerInt1BranchTarget &Target,
586 const CondWhenTrue &Cond) {
587 if (Cond.WhenTrue1 != CondARM32::kNone) {
588 _br(Target, Cond.WhenTrue1);
589 }
590 if (Cond.WhenTrue0 != CondARM32::kNone) {
591 _br(Target, Cond.WhenTrue0);
592 }
593 }
594 // End of bool folding machinery
595 // --------------------------------------------------------------------------
596
390 /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with 597 /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with
391 /// an upper16 relocation). 598 /// an upper16 relocation).
392 void _movt(Variable *Dest, Operand *Src0, 599 void _movt(Variable *Dest, Operand *Src0,
393 CondARM32::Cond Pred = CondARM32::AL) { 600 CondARM32::Cond Pred = CondARM32::AL) {
394 Context.insert(InstARM32Movt::create(Func, Dest, Src0, Pred)); 601 Context.insert(InstARM32Movt::create(Func, Dest, Src0, Pred));
395 } 602 }
396 void _movw(Variable *Dest, Operand *Src0, 603 void _movw(Variable *Dest, Operand *Src0,
397 CondARM32::Cond Pred = CondARM32::AL) { 604 CondARM32::Cond Pred = CondARM32::AL) {
398 Context.insert(InstARM32Movw::create(Func, Dest, Src0, Pred)); 605 Context.insert(InstARM32Movw::create(Func, Dest, Src0, Pred));
399 } 606 }
(...skipping 221 matching lines...) Expand 10 before | Expand all | Expand 10 after
621 llvm::SmallBitVector ValidF64Regs; 828 llvm::SmallBitVector ValidF64Regs;
622 llvm::SmallBitVector ValidV128Regs; 829 llvm::SmallBitVector ValidV128Regs;
623 }; 830 };
624 831
625 private: 832 private:
626 ~TargetARM32() override = default; 833 ~TargetARM32() override = default;
627 834
628 OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt, 835 OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt,
629 Operand *Base); 836 Operand *Base);
630 837
631 void lowerTruncToFlags(Operand *Src, CondARM32::Cond *CondIfTrue,
632 CondARM32::Cond *CondIfFalse);
633
634 class BoolComputationTracker { 838 class BoolComputationTracker {
635 public: 839 public:
636 BoolComputationTracker() = default; 840 BoolComputationTracker() = default;
637 ~BoolComputationTracker() = default; 841 ~BoolComputationTracker() = default;
638 842
639 void forgetProducers() { KnownComputations.clear(); } 843 void forgetProducers() { KnownComputations.clear(); }
640 void recordProducers(CfgNode *Node); 844 void recordProducers(CfgNode *Node);
641 845
642 const Inst *getProducerOf(const Operand *Opnd) const { 846 const Inst *getProducerOf(const Operand *Opnd) const {
643 auto *Var = llvm::dyn_cast<Variable>(Opnd); 847 auto *Var = llvm::dyn_cast<Variable>(Opnd);
644 if (Var == nullptr) { 848 if (Var == nullptr) {
645 return nullptr; 849 return nullptr;
646 } 850 }
647 851
648 auto Iter = KnownComputations.find(Var->getIndex()); 852 auto Iter = KnownComputations.find(Var->getIndex());
649 if (Iter == KnownComputations.end()) { 853 if (Iter == KnownComputations.end()) {
650 return nullptr; 854 return nullptr;
651 } 855 }
652 856
653 return Iter->second.Instr; 857 return Iter->second.Instr;
654 } 858 }
655 859
656 void dump(const Cfg *Func) const { 860 void dump(const Cfg *Func) const {
657 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding)) 861 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
658 return; 862 return;
659 OstreamLocker L(Func->getContext()); 863 OstreamLocker L(Func->getContext());
660 Ostream &Str = Func->getContext()->getStrDump(); 864 Ostream &Str = Func->getContext()->getStrDump();
661 Str << "foldable producer:\n "; 865 Str << "foldable producer:\n";
662 for (const auto &Computation : KnownComputations) { 866 for (const auto &Computation : KnownComputations) {
663 Str << " "; 867 Str << " ";
664 Computation.second.Instr->dump(Func); 868 Computation.second.Instr->dump(Func);
665 Str << "\n"; 869 Str << "\n";
666 } 870 }
667 Str << "\n"; 871 Str << "\n";
668 } 872 }
669 873
670 private: 874 private:
671 class BoolComputationEntry { 875 class BoolComputationEntry {
672 public: 876 public:
673 explicit BoolComputationEntry(Inst *I) : Instr(I) {} 877 explicit BoolComputationEntry(Inst *I) : Instr(I) {}
674 Inst *const Instr; 878 Inst *const Instr;
675 // Boolean folding is disabled for variables whose live range is multi 879 // Boolean folding is disabled for variables whose live range is multi
676 // block. We conservatively initialize IsLiveOut to true, and set it to 880 // block. We conservatively initialize IsLiveOut to true, and set it to
677 // false once we find the end of the live range for the variable defined 881 // false once we find the end of the live range for the variable defined
678 // by this instruction. If liveness analysis is not performed (e.g., in 882 // by this instruction. If liveness analysis is not performed (e.g., in
679 // Om1 mode) IsLiveOut will never be set to false, and folding will be 883 // Om1 mode) IsLiveOut will never be set to false, and folding will be
680 // disabled. 884 // disabled.
681 bool IsLiveOut = true; 885 bool IsLiveOut = true;
886 int32_t NumUses = 0;
682 }; 887 };
683 888
684 using BoolComputationMap = std::unordered_map<SizeT, BoolComputationEntry>; 889 using BoolComputationMap = std::unordered_map<SizeT, BoolComputationEntry>;
685 BoolComputationMap KnownComputations; 890 BoolComputationMap KnownComputations;
686 }; 891 };
687 892
688 BoolComputationTracker BoolComputations; 893 BoolComputationTracker BoolComputations;
689 }; 894 };
690 895
691 class TargetDataARM32 final : public TargetDataLowering { 896 class TargetDataARM32 final : public TargetDataLowering {
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
727 932
728 private: 933 private:
729 ~TargetHeaderARM32() = default; 934 ~TargetHeaderARM32() = default;
730 935
731 TargetARM32Features CPUFeatures; 936 TargetARM32Features CPUFeatures;
732 }; 937 };
733 938
734 } // end of namespace Ice 939 } // end of namespace Ice
735 940
736 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H 941 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H
OLDNEW
« no previous file with comments | « src/IceTLS.h ('k') | src/IceTargetLoweringARM32.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698