| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===// | 1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 53 TargetARM32() = delete; | 53 TargetARM32() = delete; |
| 54 TargetARM32(const TargetARM32 &) = delete; | 54 TargetARM32(const TargetARM32 &) = delete; |
| 55 TargetARM32 &operator=(const TargetARM32 &) = delete; | 55 TargetARM32 &operator=(const TargetARM32 &) = delete; |
| 56 | 56 |
| 57 public: | 57 public: |
| 58 static void staticInit(); | 58 static void staticInit(); |
| 59 // TODO(jvoung): return a unique_ptr. | 59 // TODO(jvoung): return a unique_ptr. |
| 60 static TargetARM32 *create(Cfg *Func) { return new TargetARM32(Func); } | 60 static TargetARM32 *create(Cfg *Func) { return new TargetARM32(Func); } |
| 61 | 61 |
| 62 void initNodeForLowering(CfgNode *Node) override { | 62 void initNodeForLowering(CfgNode *Node) override { |
| 63 BoolComputations.forgetProducers(); | 63 Computations.forgetProducers(); |
| 64 BoolComputations.recordProducers(Node); | 64 Computations.recordProducers(Node); |
| 65 BoolComputations.dump(Func); | 65 Computations.dump(Func); |
| 66 } | 66 } |
| 67 | 67 |
| 68 void translateOm1() override; | 68 void translateOm1() override; |
| 69 void translateO2() override; | 69 void translateO2() override; |
| 70 bool doBranchOpt(Inst *I, const CfgNode *NextNode) override; | 70 bool doBranchOpt(Inst *I, const CfgNode *NextNode) override; |
| 71 | 71 |
| 72 SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; } | 72 SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; } |
| 73 Variable *getPhysicalRegister(SizeT RegNum, Type Ty = IceType_void) override; | 73 Variable *getPhysicalRegister(SizeT RegNum, Type Ty = IceType_void) override; |
| 74 IceString getRegName(SizeT RegNum, Type Ty) const override; | 74 IceString getRegName(SizeT RegNum, Type Ty) const override; |
| 75 llvm::SmallBitVector getRegisterSet(RegSetMask Include, | 75 llvm::SmallBitVector getRegisterSet(RegSetMask Include, |
| (...skipping 715 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 791 void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero, | 791 void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero, |
| 792 CondARM32::Cond Pred = CondARM32::AL) { | 792 CondARM32::Cond Pred = CondARM32::AL) { |
| 793 Context.insert(InstARM32Vcmp::create(Func, Src0, FpZero, Pred)); | 793 Context.insert(InstARM32Vcmp::create(Func, Src0, FpZero, Pred)); |
| 794 } | 794 } |
| 795 void _veor(Variable *Dest, Variable *Src0, Variable *Src1) { | 795 void _veor(Variable *Dest, Variable *Src0, Variable *Src1) { |
| 796 Context.insert(InstARM32Veor::create(Func, Dest, Src0, Src1)); | 796 Context.insert(InstARM32Veor::create(Func, Dest, Src0, Src1)); |
| 797 } | 797 } |
| 798 void _vmrs(CondARM32::Cond Pred = CondARM32::AL) { | 798 void _vmrs(CondARM32::Cond Pred = CondARM32::AL) { |
| 799 Context.insert(InstARM32Vmrs::create(Func, Pred)); | 799 Context.insert(InstARM32Vmrs::create(Func, Pred)); |
| 800 } | 800 } |
| 801 void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) { |
| 802 Context.insert(InstARM32Vmla::create(Func, Dest, Src0, Src1)); |
| 803 } |
| 804 void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) { |
| 805 Context.insert(InstARM32Vmls::create(Func, Dest, Src0, Src1)); |
| 806 } |
| 801 void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) { | 807 void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) { |
| 802 Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1)); | 808 Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1)); |
| 803 } | 809 } |
| 804 void _vsqrt(Variable *Dest, Variable *Src, | 810 void _vsqrt(Variable *Dest, Variable *Src, |
| 805 CondARM32::Cond Pred = CondARM32::AL) { | 811 CondARM32::Cond Pred = CondARM32::AL) { |
| 806 Context.insert(InstARM32Vsqrt::create(Func, Dest, Src, Pred)); | 812 Context.insert(InstARM32Vsqrt::create(Func, Dest, Src, Pred)); |
| 807 } | 813 } |
| 808 void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) { | 814 void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) { |
| 809 Context.insert(InstARM32Vsub::create(Func, Dest, Src0, Src1)); | 815 Context.insert(InstARM32Vsub::create(Func, Dest, Src0, Src1)); |
| 810 } | 816 } |
| (...skipping 201 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1012 size_t FixedAllocaSizeBytes = 0; | 1018 size_t FixedAllocaSizeBytes = 0; |
| 1013 size_t FixedAllocaAlignBytes = 0; | 1019 size_t FixedAllocaAlignBytes = 0; |
| 1014 bool PrologEmitsFixedAllocas = false; | 1020 bool PrologEmitsFixedAllocas = false; |
| 1015 uint32_t MaxOutArgsSizeBytes = 0; | 1021 uint32_t MaxOutArgsSizeBytes = 0; |
| 1016 // TODO(jpp): std::array instead of array. | 1022 // TODO(jpp): std::array instead of array. |
| 1017 static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM]; | 1023 static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM]; |
| 1018 static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM]; | 1024 static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM]; |
| 1019 static llvm::SmallBitVector ScratchRegs; | 1025 static llvm::SmallBitVector ScratchRegs; |
| 1020 llvm::SmallBitVector RegsUsed; | 1026 llvm::SmallBitVector RegsUsed; |
| 1021 VarList PhysicalRegisters[IceType_NUM]; | 1027 VarList PhysicalRegisters[IceType_NUM]; |
| 1028 VarList PreservedGPRs; |
| 1029 VarList PreservedSRegs; |
| 1022 | 1030 |
| 1023 /// Helper class that understands the Calling Convention and register | 1031 /// Helper class that understands the Calling Convention and register |
| 1024 /// assignments. The first few integer type parameters can use r0-r3, | 1032 /// assignments. The first few integer type parameters can use r0-r3, |
| 1025 /// regardless of their position relative to the floating-point/vector | 1033 /// regardless of their position relative to the floating-point/vector |
| 1026 /// arguments in the argument list. Floating-point and vector arguments | 1034 /// arguments in the argument list. Floating-point and vector arguments |
| 1027 /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic, | 1035 /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic, |
| 1028 /// see the ARM Architecture Procedure Calling Standards (AAPCS). | 1036 /// see the ARM Architecture Procedure Calling Standards (AAPCS). |
| 1029 /// | 1037 /// |
| 1030 /// Technically, arguments that can start with registers but extend beyond the | 1038 /// Technically, arguments that can start with registers but extend beyond the |
| 1031 /// available registers can be split between the registers and the stack. | 1039 /// available registers can be split between the registers and the stack. |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1074 OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt, | 1082 OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt, |
| 1075 Operand *Base); | 1083 Operand *Base); |
| 1076 | 1084 |
| 1077 void postambleCtpop64(const InstCall *Instr); | 1085 void postambleCtpop64(const InstCall *Instr); |
| 1078 void preambleDivRem(const InstCall *Instr); | 1086 void preambleDivRem(const InstCall *Instr); |
| 1079 std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)> | 1087 std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)> |
| 1080 ARM32HelpersPreamble; | 1088 ARM32HelpersPreamble; |
| 1081 std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)> | 1089 std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)> |
| 1082 ARM32HelpersPostamble; | 1090 ARM32HelpersPostamble; |
| 1083 | 1091 |
| 1084 class BoolComputationTracker { | 1092 class ComputationTracker { |
| 1085 public: | 1093 public: |
| 1086 BoolComputationTracker() = default; | 1094 ComputationTracker() = default; |
| 1087 ~BoolComputationTracker() = default; | 1095 ~ComputationTracker() = default; |
| 1088 | 1096 |
| 1089 void forgetProducers() { KnownComputations.clear(); } | 1097 void forgetProducers() { KnownComputations.clear(); } |
| 1090 void recordProducers(CfgNode *Node); | 1098 void recordProducers(CfgNode *Node); |
| 1091 | 1099 |
| 1092 const Inst *getProducerOf(const Operand *Opnd) const { | 1100 const Inst *getProducerOf(const Operand *Opnd) const { |
| 1093 auto *Var = llvm::dyn_cast<Variable>(Opnd); | 1101 auto *Var = llvm::dyn_cast<Variable>(Opnd); |
| 1094 if (Var == nullptr) { | 1102 if (Var == nullptr) { |
| 1095 return nullptr; | 1103 return nullptr; |
| 1096 } | 1104 } |
| 1097 | 1105 |
| (...skipping 13 matching lines...) Expand all Loading... |
| 1111 Str << "foldable producer:\n"; | 1119 Str << "foldable producer:\n"; |
| 1112 for (const auto &Computation : KnownComputations) { | 1120 for (const auto &Computation : KnownComputations) { |
| 1113 Str << " "; | 1121 Str << " "; |
| 1114 Computation.second.Instr->dump(Func); | 1122 Computation.second.Instr->dump(Func); |
| 1115 Str << "\n"; | 1123 Str << "\n"; |
| 1116 } | 1124 } |
| 1117 Str << "\n"; | 1125 Str << "\n"; |
| 1118 } | 1126 } |
| 1119 | 1127 |
| 1120 private: | 1128 private: |
| 1121 class BoolComputationEntry { | 1129 class ComputationEntry { |
| 1122 public: | 1130 public: |
| 1123 explicit BoolComputationEntry(Inst *I) : Instr(I) {} | 1131 ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {} |
| 1124 Inst *const Instr; | 1132 Inst *const Instr; |
| 1125 // Boolean folding is disabled for variables whose live range is multi | 1133 // Boolean folding is disabled for variables whose live range is multi |
| 1126 // block. We conservatively initialize IsLiveOut to true, and set it to | 1134 // block. We conservatively initialize IsLiveOut to true, and set it to |
| 1127 // false once we find the end of the live range for the variable defined | 1135 // false once we find the end of the live range for the variable defined |
| 1128 // by this instruction. If liveness analysis is not performed (e.g., in | 1136 // by this instruction. If liveness analysis is not performed (e.g., in |
| 1129 // Om1 mode) IsLiveOut will never be set to false, and folding will be | 1137 // Om1 mode) IsLiveOut will never be set to false, and folding will be |
| 1130 // disabled. | 1138 // disabled. |
| 1131 bool IsLiveOut = true; | 1139 bool IsLiveOut = true; |
| 1132 int32_t NumUses = 0; | 1140 int32_t NumUses = 0; |
| 1141 Type ComputationType; |
| 1133 }; | 1142 }; |
| 1134 | 1143 |
| 1135 using BoolComputationMap = std::unordered_map<SizeT, BoolComputationEntry>; | 1144 // ComputationMap maps a Variable number to a payload identifying which |
| 1136 BoolComputationMap KnownComputations; | 1145 // instruction defined it. |
| 1146 using ComputationMap = std::unordered_map<SizeT, ComputationEntry>; |
| 1147 ComputationMap KnownComputations; |
| 1137 }; | 1148 }; |
| 1138 | 1149 |
| 1139 BoolComputationTracker BoolComputations; | 1150 ComputationTracker Computations; |
| 1140 | 1151 |
| 1141 // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked | 1152 // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked |
| 1142 // without specifying a physical register. This is needed for creating unbound | 1153 // without specifying a physical register. This is needed for creating unbound |
| 1143 // temporaries during Ice -> ARM lowering, but before register allocation. | 1154 // temporaries during Ice -> ARM lowering, but before register allocation. |
| 1144 // This a safe-guard that no unbound temporaries are created during the | 1155 // This a safe-guard that no unbound temporaries are created during the |
| 1145 // legalization post-passes. | 1156 // legalization post-passes. |
| 1146 bool AllowTemporaryWithNoReg = true; | 1157 bool AllowTemporaryWithNoReg = true; |
| 1147 // ForbidTemporaryWithoutReg is a RAII class that manages | 1158 // ForbidTemporaryWithoutReg is a RAII class that manages |
| 1148 // AllowTemporaryWithNoReg. | 1159 // AllowTemporaryWithNoReg. |
| 1149 class ForbidTemporaryWithoutReg { | 1160 class ForbidTemporaryWithoutReg { |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1202 | 1213 |
| 1203 private: | 1214 private: |
| 1204 ~TargetHeaderARM32() = default; | 1215 ~TargetHeaderARM32() = default; |
| 1205 | 1216 |
| 1206 TargetARM32Features CPUFeatures; | 1217 TargetARM32Features CPUFeatures; |
| 1207 }; | 1218 }; |
| 1208 | 1219 |
| 1209 } // end of namespace Ice | 1220 } // end of namespace Ice |
| 1210 | 1221 |
| 1211 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H | 1222 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H |
| OLD | NEW |