OLD | NEW |
1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===// | 1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
92 } | 92 } |
93 SizeT getReservedTmpReg() const { return RegARM32::Reg_ip; } | 93 SizeT getReservedTmpReg() const { return RegARM32::Reg_ip; } |
94 | 94 |
95 size_t typeWidthInBytesOnStack(Type Ty) const override { | 95 size_t typeWidthInBytesOnStack(Type Ty) const override { |
96 // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16 | 96 // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16 |
97 // are rounded up to 4 bytes. | 97 // are rounded up to 4 bytes. |
98 return (typeWidthInBytes(Ty) + 3) & ~3; | 98 return (typeWidthInBytes(Ty) + 3) & ~3; |
99 } | 99 } |
100 uint32_t getStackAlignment() const override; | 100 uint32_t getStackAlignment() const override; |
101 void reserveFixedAllocaArea(size_t Size, size_t Align) override { | 101 void reserveFixedAllocaArea(size_t Size, size_t Align) override { |
102 // TODO(sehr,jpp): Implement fixed stack layout. | 102 FixedAllocaSizeBytes = Size; |
103 (void)Size; | 103 assert(llvm::isPowerOf2_32(Align)); |
104 (void)Align; | 104 FixedAllocaAlignBytes = Align; |
105 llvm::report_fatal_error("Not yet implemented"); | 105 PrologEmitsFixedAllocas = true; |
106 } | 106 } |
107 int32_t getFrameFixedAllocaOffset() const override { | 107 int32_t getFrameFixedAllocaOffset() const override { |
108 // TODO(sehr,jpp): Implement fixed stack layout. | 108 return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes); |
109 llvm::report_fatal_error("Not yet implemented"); | |
110 return 0; | |
111 } | 109 } |
| 110 uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; } |
112 | 111 |
113 bool shouldSplitToVariable64On32(Type Ty) const override { | 112 bool shouldSplitToVariable64On32(Type Ty) const override { |
114 return Ty == IceType_i64; | 113 return Ty == IceType_i64; |
115 } | 114 } |
116 | 115 |
117 // TODO(ascull): what size is best for ARM? | 116 // TODO(ascull): what size is best for ARM? |
118 SizeT getMinJumpTableSize() const override { return 3; } | 117 SizeT getMinJumpTableSize() const override { return 3; } |
119 void emitJumpTable(const Cfg *Func, | 118 void emitJumpTable(const Cfg *Func, |
120 const InstJumpTable *JumpTable) const override; | 119 const InstJumpTable *JumpTable) const override; |
121 | 120 |
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
243 void doAddressOptStore() override; | 242 void doAddressOptStore() override; |
244 void randomlyInsertNop(float Probability, | 243 void randomlyInsertNop(float Probability, |
245 RandomNumberGenerator &RNG) override; | 244 RandomNumberGenerator &RNG) override; |
246 | 245 |
247 OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty); | 246 OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty); |
248 | 247 |
249 Variable64On32 *makeI64RegPair(); | 248 Variable64On32 *makeI64RegPair(); |
250 Variable *makeReg(Type Ty, int32_t RegNum = Variable::NoRegister); | 249 Variable *makeReg(Type Ty, int32_t RegNum = Variable::NoRegister); |
251 static Type stackSlotType(); | 250 static Type stackSlotType(); |
252 Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister); | 251 Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister); |
253 void alignRegisterPow2(Variable *Reg, uint32_t Align); | 252 void alignRegisterPow2(Variable *Reg, uint32_t Align, |
| 253 int32_t TmpRegNum = Variable::NoRegister); |
254 | 254 |
255 /// Returns a vector in a register with the given constant entries. | 255 /// Returns a vector in a register with the given constant entries. |
256 Variable *makeVectorOfZeros(Type Ty, int32_t RegNum = Variable::NoRegister); | 256 Variable *makeVectorOfZeros(Type Ty, int32_t RegNum = Variable::NoRegister); |
257 | 257 |
258 void | 258 void |
259 makeRandomRegisterPermutation(llvm::SmallVectorImpl<int32_t> &Permutation, | 259 makeRandomRegisterPermutation(llvm::SmallVectorImpl<int32_t> &Permutation, |
260 const llvm::SmallBitVector &ExcludeRegisters, | 260 const llvm::SmallBitVector &ExcludeRegisters, |
261 uint64_t Salt) const override; | 261 uint64_t Salt) const override; |
262 | 262 |
263 // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap; | 263 // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap; |
(...skipping 540 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
804 } | 804 } |
805 void _vsqrt(Variable *Dest, Variable *Src, | 805 void _vsqrt(Variable *Dest, Variable *Src, |
806 CondARM32::Cond Pred = CondARM32::AL) { | 806 CondARM32::Cond Pred = CondARM32::AL) { |
807 Context.insert(InstARM32Vsqrt::create(Func, Dest, Src, Pred)); | 807 Context.insert(InstARM32Vsqrt::create(Func, Dest, Src, Pred)); |
808 } | 808 } |
809 void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) { | 809 void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) { |
810 Context.insert(InstARM32Vsub::create(Func, Dest, Src0, Src1)); | 810 Context.insert(InstARM32Vsub::create(Func, Dest, Src0, Src1)); |
811 } | 811 } |
812 | 812 |
813 // Iterates over the CFG and determines the maximum outgoing stack arguments | 813 // Iterates over the CFG and determines the maximum outgoing stack arguments |
814 // bytes. This information is later used during addProlog() do pre-allocate | 814 // bytes. This information is later used during addProlog() to pre-allocate |
815 // the outargs area. | 815 // the outargs area. |
816 // TODO(jpp): This could live in the Parser, if we provided a Target-specific | 816 // TODO(jpp): This could live in the Parser, if we provided a Target-specific |
817 // method that the Parser could call. | 817 // method that the Parser could call. |
818 void findMaxStackOutArgsSize(); | 818 void findMaxStackOutArgsSize(); |
819 | 819 |
820 /// Run a pass through stack variables and ensure that the offsets are legal. | 820 /// Run a pass through stack variables and ensure that the offsets are legal. |
821 /// If the offset is not legal, use a new base register that accounts for the | 821 /// If the offset is not legal, use a new base register that accounts for the |
822 /// offset, such that the addressing mode offset bits are now legal. | 822 /// offset, such that the addressing mode offset bits are now legal. |
823 void legalizeStackSlots(); | 823 void legalizeStackSlots(); |
824 /// Returns true if the given Offset can be represented in a ldr/str. | 824 /// Returns true if the given Offset can be represented in a ldr/str. |
(...skipping 20 matching lines...) Expand all Loading... |
845 /// Legalizes Mov if its Source (or Destination) is a spilled Variable. Moves | 845 /// Legalizes Mov if its Source (or Destination) is a spilled Variable. Moves |
846 /// to memory become store instructions, and moves from memory, loads. | 846 /// to memory become store instructions, and moves from memory, loads. |
847 void legalizeMov(InstARM32Mov *Mov, Variable *OrigBaseReg, | 847 void legalizeMov(InstARM32Mov *Mov, Variable *OrigBaseReg, |
848 Variable **NewBaseReg, int32_t *NewBaseOffset); | 848 Variable **NewBaseReg, int32_t *NewBaseOffset); |
849 | 849 |
850 TargetARM32Features CPUFeatures; | 850 TargetARM32Features CPUFeatures; |
851 bool UsesFramePointer = false; | 851 bool UsesFramePointer = false; |
852 bool NeedsStackAlignment = false; | 852 bool NeedsStackAlignment = false; |
853 bool MaybeLeafFunc = true; | 853 bool MaybeLeafFunc = true; |
854 size_t SpillAreaSizeBytes = 0; | 854 size_t SpillAreaSizeBytes = 0; |
| 855 size_t FixedAllocaSizeBytes = 0; |
| 856 size_t FixedAllocaAlignBytes = 0; |
| 857 bool PrologEmitsFixedAllocas = false; |
855 uint32_t MaxOutArgsSizeBytes = 0; | 858 uint32_t MaxOutArgsSizeBytes = 0; |
856 // TODO(jpp): std::array instead of array. | 859 // TODO(jpp): std::array instead of array. |
857 static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM]; | 860 static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM]; |
858 static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM]; | 861 static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM]; |
859 static llvm::SmallBitVector ScratchRegs; | 862 static llvm::SmallBitVector ScratchRegs; |
860 llvm::SmallBitVector RegsUsed; | 863 llvm::SmallBitVector RegsUsed; |
861 VarList PhysicalRegisters[IceType_NUM]; | 864 VarList PhysicalRegisters[IceType_NUM]; |
862 | 865 |
863 /// Helper class that understands the Calling Convention and register | 866 /// Helper class that understands the Calling Convention and register |
864 /// assignments. The first few integer type parameters can use r0-r3, | 867 /// assignments. The first few integer type parameters can use r0-r3, |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
963 // disabled. | 966 // disabled. |
964 bool IsLiveOut = true; | 967 bool IsLiveOut = true; |
965 int32_t NumUses = 0; | 968 int32_t NumUses = 0; |
966 }; | 969 }; |
967 | 970 |
968 using BoolComputationMap = std::unordered_map<SizeT, BoolComputationEntry>; | 971 using BoolComputationMap = std::unordered_map<SizeT, BoolComputationEntry>; |
969 BoolComputationMap KnownComputations; | 972 BoolComputationMap KnownComputations; |
970 }; | 973 }; |
971 | 974 |
972 BoolComputationTracker BoolComputations; | 975 BoolComputationTracker BoolComputations; |
| 976 |
| 977 // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked |
| 978 // without specifying a physical register. This is needed for creating unbound |
| 979 // temporaries during Ice -> ARM lowering, but before register allocation. |
| 980 // This a safe-guard that, during the legalization post-passes no unbound |
| 981 // temporaries are created. |
| 982 bool AllowTemporaryWithNoReg = true; |
| 983 // ForbidTemporaryWithoutReg is a RAII class that manages |
| 984 // AllowTemporaryWithNoReg. |
| 985 class ForbidTemporaryWithoutReg { |
| 986 ForbidTemporaryWithoutReg() = delete; |
| 987 ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg&) = delete; |
| 988 ForbidTemporaryWithoutReg &operator=(const ForbidTemporaryWithoutReg&) = del
ete; |
| 989 |
| 990 public: |
| 991 explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) { |
| 992 Target->AllowTemporaryWithNoReg = false; |
| 993 } |
| 994 ~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; } |
| 995 |
| 996 private: |
| 997 TargetARM32 *const Target; |
| 998 }; |
973 }; | 999 }; |
974 | 1000 |
975 class TargetDataARM32 final : public TargetDataLowering { | 1001 class TargetDataARM32 final : public TargetDataLowering { |
976 TargetDataARM32() = delete; | 1002 TargetDataARM32() = delete; |
977 TargetDataARM32(const TargetDataARM32 &) = delete; | 1003 TargetDataARM32(const TargetDataARM32 &) = delete; |
978 TargetDataARM32 &operator=(const TargetDataARM32 &) = delete; | 1004 TargetDataARM32 &operator=(const TargetDataARM32 &) = delete; |
979 | 1005 |
980 public: | 1006 public: |
981 static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) { | 1007 static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) { |
982 return std::unique_ptr<TargetDataLowering>(new TargetDataARM32(Ctx)); | 1008 return std::unique_ptr<TargetDataLowering>(new TargetDataARM32(Ctx)); |
(...skipping 28 matching lines...) Expand all Loading... |
1011 | 1037 |
1012 private: | 1038 private: |
1013 ~TargetHeaderARM32() = default; | 1039 ~TargetHeaderARM32() = default; |
1014 | 1040 |
1015 TargetARM32Features CPUFeatures; | 1041 TargetARM32Features CPUFeatures; |
1016 }; | 1042 }; |
1017 | 1043 |
1018 } // end of namespace Ice | 1044 } // end of namespace Ice |
1019 | 1045 |
1020 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H | 1046 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H |
OLD | NEW |