Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(387)

Side by Side Diff: src/IceTargetLoweringARM32.h

Issue 1481133002: Subzero. ARM32. Show FP lowering some love. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===// 1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
53 TargetARM32() = delete; 53 TargetARM32() = delete;
54 TargetARM32(const TargetARM32 &) = delete; 54 TargetARM32(const TargetARM32 &) = delete;
55 TargetARM32 &operator=(const TargetARM32 &) = delete; 55 TargetARM32 &operator=(const TargetARM32 &) = delete;
56 56
57 public: 57 public:
58 static void staticInit(); 58 static void staticInit();
59 // TODO(jvoung): return a unique_ptr. 59 // TODO(jvoung): return a unique_ptr.
60 static TargetARM32 *create(Cfg *Func) { return new TargetARM32(Func); } 60 static TargetARM32 *create(Cfg *Func) { return new TargetARM32(Func); }
61 61
62 void initNodeForLowering(CfgNode *Node) override { 62 void initNodeForLowering(CfgNode *Node) override {
63 BoolComputations.forgetProducers(); 63 Computations.forgetProducers();
64 BoolComputations.recordProducers(Node); 64 Computations.recordProducers(Node);
65 BoolComputations.dump(Func); 65 Computations.dump(Func);
66 } 66 }
67 67
68 void translateOm1() override; 68 void translateOm1() override;
69 void translateO2() override; 69 void translateO2() override;
70 bool doBranchOpt(Inst *I, const CfgNode *NextNode) override; 70 bool doBranchOpt(Inst *I, const CfgNode *NextNode) override;
71 71
72 SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; } 72 SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; }
73 Variable *getPhysicalRegister(SizeT RegNum, Type Ty = IceType_void) override; 73 Variable *getPhysicalRegister(SizeT RegNum, Type Ty = IceType_void) override;
74 IceString getRegName(SizeT RegNum, Type Ty) const override; 74 IceString getRegName(SizeT RegNum, Type Ty) const override;
75 llvm::SmallBitVector getRegisterSet(RegSetMask Include, 75 llvm::SmallBitVector getRegisterSet(RegSetMask Include,
(...skipping 715 matching lines...) Expand 10 before | Expand all | Expand 10 after
791 void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero, 791 void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero,
792 CondARM32::Cond Pred = CondARM32::AL) { 792 CondARM32::Cond Pred = CondARM32::AL) {
793 Context.insert(InstARM32Vcmp::create(Func, Src0, FpZero, Pred)); 793 Context.insert(InstARM32Vcmp::create(Func, Src0, FpZero, Pred));
794 } 794 }
795 void _veor(Variable *Dest, Variable *Src0, Variable *Src1) { 795 void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
796 Context.insert(InstARM32Veor::create(Func, Dest, Src0, Src1)); 796 Context.insert(InstARM32Veor::create(Func, Dest, Src0, Src1));
797 } 797 }
798 void _vmrs(CondARM32::Cond Pred = CondARM32::AL) { 798 void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
799 Context.insert(InstARM32Vmrs::create(Func, Pred)); 799 Context.insert(InstARM32Vmrs::create(Func, Pred));
800 } 800 }
801 void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) {
802 Context.insert(InstARM32Vmla::create(Func, Dest, Src0, Src1));
803 }
804 void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) {
805 Context.insert(InstARM32Vmls::create(Func, Dest, Src0, Src1));
806 }
801 void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) { 807 void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
802 Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1)); 808 Context.insert(InstARM32Vmul::create(Func, Dest, Src0, Src1));
803 } 809 }
804 void _vsqrt(Variable *Dest, Variable *Src, 810 void _vsqrt(Variable *Dest, Variable *Src,
805 CondARM32::Cond Pred = CondARM32::AL) { 811 CondARM32::Cond Pred = CondARM32::AL) {
806 Context.insert(InstARM32Vsqrt::create(Func, Dest, Src, Pred)); 812 Context.insert(InstARM32Vsqrt::create(Func, Dest, Src, Pred));
807 } 813 }
808 void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) { 814 void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) {
809 Context.insert(InstARM32Vsub::create(Func, Dest, Src0, Src1)); 815 Context.insert(InstARM32Vsub::create(Func, Dest, Src0, Src1));
810 } 816 }
(...skipping 201 matching lines...) Expand 10 before | Expand all | Expand 10 after
1012 size_t FixedAllocaSizeBytes = 0; 1018 size_t FixedAllocaSizeBytes = 0;
1013 size_t FixedAllocaAlignBytes = 0; 1019 size_t FixedAllocaAlignBytes = 0;
1014 bool PrologEmitsFixedAllocas = false; 1020 bool PrologEmitsFixedAllocas = false;
1015 uint32_t MaxOutArgsSizeBytes = 0; 1021 uint32_t MaxOutArgsSizeBytes = 0;
1016 // TODO(jpp): std::array instead of array. 1022 // TODO(jpp): std::array instead of array.
1017 static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM]; 1023 static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM];
1018 static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM]; 1024 static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
1019 static llvm::SmallBitVector ScratchRegs; 1025 static llvm::SmallBitVector ScratchRegs;
1020 llvm::SmallBitVector RegsUsed; 1026 llvm::SmallBitVector RegsUsed;
1021 VarList PhysicalRegisters[IceType_NUM]; 1027 VarList PhysicalRegisters[IceType_NUM];
1028 VarList PreservedGPRs;
1029 VarList PreservedSRegs;
1022 1030
1023 /// Helper class that understands the Calling Convention and register 1031 /// Helper class that understands the Calling Convention and register
1024 /// assignments. The first few integer type parameters can use r0-r3, 1032 /// assignments. The first few integer type parameters can use r0-r3,
1025 /// regardless of their position relative to the floating-point/vector 1033 /// regardless of their position relative to the floating-point/vector
1026 /// arguments in the argument list. Floating-point and vector arguments 1034 /// arguments in the argument list. Floating-point and vector arguments
1027 /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic, 1035 /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic,
1028 /// see the ARM Architecture Procedure Calling Standards (AAPCS). 1036 /// see the ARM Architecture Procedure Calling Standards (AAPCS).
1029 /// 1037 ///
1030 /// Technically, arguments that can start with registers but extend beyond the 1038 /// Technically, arguments that can start with registers but extend beyond the
1031 /// available registers can be split between the registers and the stack. 1039 /// available registers can be split between the registers and the stack.
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
1074 OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt, 1082 OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt,
1075 Operand *Base); 1083 Operand *Base);
1076 1084
1077 void postambleCtpop64(const InstCall *Instr); 1085 void postambleCtpop64(const InstCall *Instr);
1078 void preambleDivRem(const InstCall *Instr); 1086 void preambleDivRem(const InstCall *Instr);
1079 std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)> 1087 std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)>
1080 ARM32HelpersPreamble; 1088 ARM32HelpersPreamble;
1081 std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)> 1089 std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)>
1082 ARM32HelpersPostamble; 1090 ARM32HelpersPostamble;
1083 1091
1084 class BoolComputationTracker { 1092 class ComputationTracker {
1085 public: 1093 public:
1086 BoolComputationTracker() = default; 1094 ComputationTracker() = default;
1087 ~BoolComputationTracker() = default; 1095 ~ComputationTracker() = default;
1088 1096
1089 void forgetProducers() { KnownComputations.clear(); } 1097 void forgetProducers() { KnownComputations.clear(); }
1090 void recordProducers(CfgNode *Node); 1098 void recordProducers(CfgNode *Node);
1091 1099
1092 const Inst *getProducerOf(const Operand *Opnd) const { 1100 const Inst *getProducerOf(const Operand *Opnd) const {
1093 auto *Var = llvm::dyn_cast<Variable>(Opnd); 1101 auto *Var = llvm::dyn_cast<Variable>(Opnd);
1094 if (Var == nullptr) { 1102 if (Var == nullptr) {
1095 return nullptr; 1103 return nullptr;
1096 } 1104 }
1097 1105
(...skipping 13 matching lines...) Expand all
1111 Str << "foldable producer:\n"; 1119 Str << "foldable producer:\n";
1112 for (const auto &Computation : KnownComputations) { 1120 for (const auto &Computation : KnownComputations) {
1113 Str << " "; 1121 Str << " ";
1114 Computation.second.Instr->dump(Func); 1122 Computation.second.Instr->dump(Func);
1115 Str << "\n"; 1123 Str << "\n";
1116 } 1124 }
1117 Str << "\n"; 1125 Str << "\n";
1118 } 1126 }
1119 1127
1120 private: 1128 private:
1121 class BoolComputationEntry { 1129 class ComputationEntry {
1122 public: 1130 public:
1123 explicit BoolComputationEntry(Inst *I) : Instr(I) {} 1131 ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {}
1124 Inst *const Instr; 1132 Inst *const Instr;
1125 // Boolean folding is disabled for variables whose live range is multi 1133 // Boolean folding is disabled for variables whose live range is multi
1126 // block. We conservatively initialize IsLiveOut to true, and set it to 1134 // block. We conservatively initialize IsLiveOut to true, and set it to
1127 // false once we find the end of the live range for the variable defined 1135 // false once we find the end of the live range for the variable defined
1128 // by this instruction. If liveness analysis is not performed (e.g., in 1136 // by this instruction. If liveness analysis is not performed (e.g., in
1129 // Om1 mode) IsLiveOut will never be set to false, and folding will be 1137 // Om1 mode) IsLiveOut will never be set to false, and folding will be
1130 // disabled. 1138 // disabled.
1131 bool IsLiveOut = true; 1139 bool IsLiveOut = true;
1132 int32_t NumUses = 0; 1140 int32_t NumUses = 0;
1141 Type ComputationType;
1133 }; 1142 };
1134 1143
1135 using BoolComputationMap = std::unordered_map<SizeT, BoolComputationEntry>; 1144 using ComputationMap = std::unordered_map<SizeT, ComputationEntry>;
Jim Stichnoth 2015/12/07 20:58:14 Add a comment indicating the meaning of the SizeT
John 2015/12/08 13:54:25 Done -- maybe add a comment to X86 as well? Happy
Jim Stichnoth 2015/12/08 19:20:16 That would be great, thanks.
John 2015/12/09 13:11:08 The comment was already there. :)
1136 BoolComputationMap KnownComputations; 1145 ComputationMap KnownComputations;
1137 }; 1146 };
1138 1147
1139 BoolComputationTracker BoolComputations; 1148 ComputationTracker Computations;
1140 1149
1141 // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked 1150 // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
1142 // without specifying a physical register. This is needed for creating unbound 1151 // without specifying a physical register. This is needed for creating unbound
1143 // temporaries during Ice -> ARM lowering, but before register allocation. 1152 // temporaries during Ice -> ARM lowering, but before register allocation.
1144 // This a safe-guard that no unbound temporaries are created during the 1153 // This a safe-guard that no unbound temporaries are created during the
1145 // legalization post-passes. 1154 // legalization post-passes.
1146 bool AllowTemporaryWithNoReg = true; 1155 bool AllowTemporaryWithNoReg = true;
1147 // ForbidTemporaryWithoutReg is a RAII class that manages 1156 // ForbidTemporaryWithoutReg is a RAII class that manages
1148 // AllowTemporaryWithNoReg. 1157 // AllowTemporaryWithNoReg.
1149 class ForbidTemporaryWithoutReg { 1158 class ForbidTemporaryWithoutReg {
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
1202 1211
1203 private: 1212 private:
1204 ~TargetHeaderARM32() = default; 1213 ~TargetHeaderARM32() = default;
1205 1214
1206 TargetARM32Features CPUFeatures; 1215 TargetARM32Features CPUFeatures;
1207 }; 1216 };
1208 1217
1209 } // end of namespace Ice 1218 } // end of namespace Ice
1210 1219
1211 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H 1220 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698