| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file implements the TargetLoweringARM32 class, which consists almost | 10 // This file implements the TargetLoweringARM32 class, which consists almost |
| 11 // entirely of the lowering sequence for each high-level instruction. | 11 // entirely of the lowering sequence for each high-level instruction. |
| 12 // | 12 // |
| 13 //===----------------------------------------------------------------------===// | 13 //===----------------------------------------------------------------------===// |
| 14 | 14 |
| 15 #include "IceTargetLoweringARM32.h" | 15 #include "IceTargetLoweringARM32.h" |
| 16 | 16 |
| 17 #include "IceCfg.h" | 17 #include "IceCfg.h" |
| 18 #include "IceCfgNode.h" | 18 #include "IceCfgNode.h" |
| 19 #include "IceClFlags.h" | 19 #include "IceClFlags.h" |
| 20 #include "IceDefs.h" | 20 #include "IceDefs.h" |
| 21 #include "IceELFObjectWriter.h" | 21 #include "IceELFObjectWriter.h" |
| 22 #include "IceGlobalInits.h" | 22 #include "IceGlobalInits.h" |
| 23 #include "IceInstARM32.h" | 23 #include "IceInstARM32.h" |
| 24 #include "IceLiveness.h" | 24 #include "IceLiveness.h" |
| 25 #include "IceOperand.h" | 25 #include "IceOperand.h" |
| 26 #include "IceRegistersARM32.h" | 26 #include "IceRegistersARM32.h" |
| 27 #include "IceTargetLoweringARM32.def" | 27 #include "IceTargetLoweringARM32.def" |
| 28 #include "IceUtils.h" | 28 #include "IceUtils.h" |
| 29 |
| 30 #pragma clang diagnostic push |
| 31 #pragma clang diagnostic ignored "-Wunused-parameter" |
| 32 #pragma clang diagnostic ignored "-Wshadow" |
| 29 #include "llvm/Support/MathExtras.h" | 33 #include "llvm/Support/MathExtras.h" |
| 34 #pragma clang diagnostic pop |
| 30 | 35 |
| 31 namespace Ice { | 36 namespace Ice { |
| 32 | 37 |
| 33 namespace { | 38 namespace { |
| 34 | 39 |
| 35 void UnimplementedError(const ClFlags &Flags) { | 40 void UnimplementedError(const ClFlags &Flags) { |
| 36 if (!Flags.getSkipUnimplemented()) { | 41 if (!Flags.getSkipUnimplemented()) { |
| 37 // Use llvm_unreachable instead of report_fatal_error, which gives better | 42 // Use llvm_unreachable instead of report_fatal_error, which gives better |
| 38 // stack traces. | 43 // stack traces. |
| 39 llvm_unreachable("Not yet implemented"); | 44 llvm_unreachable("Not yet implemented"); |
| (...skipping 1147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1187 // orr t_hi, t_hi, b.lo, lsr t_c2 | 1192 // orr t_hi, t_hi, b.lo, lsr t_c2 |
| 1188 // lsl t_lo, b.lo, c.lo | 1193 // lsl t_lo, b.lo, c.lo |
| 1189 // a.lo = t_lo | 1194 // a.lo = t_lo |
| 1190 // a.hi = t_hi | 1195 // a.hi = t_hi |
| 1191 // Can be strength-reduced for constant-shifts, but we don't do | 1196 // Can be strength-reduced for constant-shifts, but we don't do |
| 1192 // that for now. | 1197 // that for now. |
| 1193 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. | 1198 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. |
| 1194 // On ARM, shifts only take the lower 8 bits of the shift register, | 1199 // On ARM, shifts only take the lower 8 bits of the shift register, |
| 1195 // and saturate to the range 0-32, so the negative value will | 1200 // and saturate to the range 0-32, so the negative value will |
| 1196 // saturate to 32. | 1201 // saturate to 32. |
| 1197 Variable *T_Hi = makeReg(IceType_i32); | 1202 T_Hi = makeReg(IceType_i32); |
| 1198 Variable *Src1RLo = legalizeToVar(Src1Lo); | 1203 Variable *Src1RLo = legalizeToVar(Src1Lo); |
| 1199 Constant *ThirtyTwo = Ctx->getConstantInt32(32); | 1204 Constant *ThirtyTwo = Ctx->getConstantInt32(32); |
| 1200 Variable *T_C1 = makeReg(IceType_i32); | 1205 Variable *T_C1 = makeReg(IceType_i32); |
| 1201 Variable *T_C2 = makeReg(IceType_i32); | 1206 Variable *T_C2 = makeReg(IceType_i32); |
| 1202 _sub(T_C1, Src1RLo, ThirtyTwo); | 1207 _sub(T_C1, Src1RLo, ThirtyTwo); |
| 1203 _lsl(T_Hi, Src0RHi, Src1RLo); | 1208 _lsl(T_Hi, Src0RHi, Src1RLo); |
| 1204 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | 1209 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
| 1205 OperandARM32::LSL, T_C1)); | 1210 OperandARM32::LSL, T_C1)); |
| 1206 _rsb(T_C2, Src1RLo, ThirtyTwo); | 1211 _rsb(T_C2, Src1RLo, ThirtyTwo); |
| 1207 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | 1212 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
| 1208 OperandARM32::LSR, T_C2)); | 1213 OperandARM32::LSR, T_C2)); |
| 1209 _mov(DestHi, T_Hi); | 1214 _mov(DestHi, T_Hi); |
| 1210 Variable *T_Lo = makeReg(IceType_i32); | 1215 T_Lo = makeReg(IceType_i32); |
| 1211 // _mov seems to sometimes have better register preferencing than lsl. | 1216 // _mov seems to sometimes have better register preferencing than lsl. |
| 1212 // Otherwise mov w/ lsl shifted register is a pseudo-instruction | 1217 // Otherwise mov w/ lsl shifted register is a pseudo-instruction |
| 1213 // that maps to lsl. | 1218 // that maps to lsl. |
| 1214 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | 1219 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
| 1215 OperandARM32::LSL, Src1RLo)); | 1220 OperandARM32::LSL, Src1RLo)); |
| 1216 _mov(DestLo, T_Lo); | 1221 _mov(DestLo, T_Lo); |
| 1217 } break; | 1222 } break; |
| 1218 case InstArithmetic::Lshr: | 1223 case InstArithmetic::Lshr: |
| 1219 // a=b>>c (unsigned) ==> | 1224 // a=b>>c (unsigned) ==> |
| 1220 // GCC 4.8 does: | 1225 // GCC 4.8 does: |
| 1221 // rsb t_c1, c.lo, #32 | 1226 // rsb t_c1, c.lo, #32 |
| 1222 // lsr t_lo, b.lo, c.lo | 1227 // lsr t_lo, b.lo, c.lo |
| 1223 // orr t_lo, t_lo, b.hi, lsl t_c1 | 1228 // orr t_lo, t_lo, b.hi, lsl t_c1 |
| 1224 // sub t_c2, c.lo, #32 | 1229 // sub t_c2, c.lo, #32 |
| 1225 // orr t_lo, t_lo, b.hi, lsr t_c2 | 1230 // orr t_lo, t_lo, b.hi, lsr t_c2 |
| 1226 // lsr t_hi, b.hi, c.lo | 1231 // lsr t_hi, b.hi, c.lo |
| 1227 // a.lo = t_lo | 1232 // a.lo = t_lo |
| 1228 // a.hi = t_hi | 1233 // a.hi = t_hi |
| 1229 case InstArithmetic::Ashr: { | 1234 case InstArithmetic::Ashr: { |
| 1230 // a=b>>c (signed) ==> ... | 1235 // a=b>>c (signed) ==> ... |
| 1231 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, | 1236 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, |
| 1232 // and the next orr should be conditioned on PLUS. The last two | 1237 // and the next orr should be conditioned on PLUS. The last two |
| 1233 // right shifts should also be arithmetic. | 1238 // right shifts should also be arithmetic. |
| 1234 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; | 1239 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; |
| 1235 Variable *T_Lo = makeReg(IceType_i32); | 1240 T_Lo = makeReg(IceType_i32); |
| 1236 Variable *Src1RLo = legalizeToVar(Src1Lo); | 1241 Variable *Src1RLo = legalizeToVar(Src1Lo); |
| 1237 Constant *ThirtyTwo = Ctx->getConstantInt32(32); | 1242 Constant *ThirtyTwo = Ctx->getConstantInt32(32); |
| 1238 Variable *T_C1 = makeReg(IceType_i32); | 1243 Variable *T_C1 = makeReg(IceType_i32); |
| 1239 Variable *T_C2 = makeReg(IceType_i32); | 1244 Variable *T_C2 = makeReg(IceType_i32); |
| 1240 _rsb(T_C1, Src1RLo, ThirtyTwo); | 1245 _rsb(T_C1, Src1RLo, ThirtyTwo); |
| 1241 _lsr(T_Lo, Src0RLo, Src1RLo); | 1246 _lsr(T_Lo, Src0RLo, Src1RLo); |
| 1242 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | 1247 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
| 1243 OperandARM32::LSL, T_C1)); | 1248 OperandARM32::LSL, T_C1)); |
| 1244 OperandARM32::ShiftKind RShiftKind; | 1249 OperandARM32::ShiftKind RShiftKind; |
| 1245 CondARM32::Cond Pred; | 1250 CondARM32::Cond Pred; |
| 1246 if (IsAshr) { | 1251 if (IsAshr) { |
| 1247 _subs(T_C2, Src1RLo, ThirtyTwo); | 1252 _subs(T_C2, Src1RLo, ThirtyTwo); |
| 1248 RShiftKind = OperandARM32::ASR; | 1253 RShiftKind = OperandARM32::ASR; |
| 1249 Pred = CondARM32::PL; | 1254 Pred = CondARM32::PL; |
| 1250 } else { | 1255 } else { |
| 1251 _sub(T_C2, Src1RLo, ThirtyTwo); | 1256 _sub(T_C2, Src1RLo, ThirtyTwo); |
| 1252 RShiftKind = OperandARM32::LSR; | 1257 RShiftKind = OperandARM32::LSR; |
| 1253 Pred = CondARM32::AL; | 1258 Pred = CondARM32::AL; |
| 1254 } | 1259 } |
| 1255 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | 1260 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
| 1256 RShiftKind, T_C2), | 1261 RShiftKind, T_C2), |
| 1257 Pred); | 1262 Pred); |
| 1258 _mov(DestLo, T_Lo); | 1263 _mov(DestLo, T_Lo); |
| 1259 Variable *T_Hi = makeReg(IceType_i32); | 1264 T_Hi = makeReg(IceType_i32); |
| 1260 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | 1265 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
| 1261 RShiftKind, Src1RLo)); | 1266 RShiftKind, Src1RLo)); |
| 1262 _mov(DestHi, T_Hi); | 1267 _mov(DestHi, T_Hi); |
| 1263 } break; | 1268 } break; |
| 1264 case InstArithmetic::Udiv: | 1269 case InstArithmetic::Udiv: |
| 1265 case InstArithmetic::Sdiv: | 1270 case InstArithmetic::Sdiv: |
| 1266 case InstArithmetic::Urem: | 1271 case InstArithmetic::Urem: |
| 1267 case InstArithmetic::Srem: { | 1272 case InstArithmetic::Srem: { |
| 1268 // Check for divide by 0 (ARM normally doesn't trap, but we want it | 1273 // Check for divide by 0 (ARM normally doesn't trap, but we want it |
| 1269 // to trap for NaCl). Src1Lo and Src1Hi may have already been legalized | 1274 // to trap for NaCl). Src1Lo and Src1Hi may have already been legalized |
| (...skipping 335 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1605 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); | 1610 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); |
| 1606 Context.insert(NewCall); | 1611 Context.insert(NewCall); |
| 1607 if (ReturnRegHi) | 1612 if (ReturnRegHi) |
| 1608 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | 1613 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
| 1609 | 1614 |
| 1610 // Add the appropriate offset to SP. The call instruction takes care | 1615 // Add the appropriate offset to SP. The call instruction takes care |
| 1611 // of resetting the stack offset during emission. | 1616 // of resetting the stack offset during emission. |
| 1612 if (ParameterAreaSizeBytes) { | 1617 if (ParameterAreaSizeBytes) { |
| 1613 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), | 1618 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), |
| 1614 Legal_Reg | Legal_Flex); | 1619 Legal_Reg | Legal_Flex); |
| 1615 Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp); | |
| 1616 _add(SP, SP, AddAmount); | 1620 _add(SP, SP, AddAmount); |
| 1617 } | 1621 } |
| 1618 | 1622 |
| 1619 // Insert a register-kill pseudo instruction. | 1623 // Insert a register-kill pseudo instruction. |
| 1620 Context.insert(InstFakeKill::create(Func, NewCall)); | 1624 Context.insert(InstFakeKill::create(Func, NewCall)); |
| 1621 | 1625 |
| 1622 // Generate a FakeUse to keep the call live if necessary. | 1626 // Generate a FakeUse to keep the call live if necessary. |
| 1623 if (Instr->hasSideEffects() && ReturnReg) { | 1627 if (Instr->hasSideEffects() && ReturnReg) { |
| 1624 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); | 1628 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); |
| 1625 Context.insert(FakeUse); | 1629 Context.insert(FakeUse); |
| (...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1751 Variable *T = makeReg(Dest->getType()); | 1755 Variable *T = makeReg(Dest->getType()); |
| 1752 _uxt(T, Src0R); | 1756 _uxt(T, Src0R); |
| 1753 _mov(Dest, T); | 1757 _mov(Dest, T); |
| 1754 } | 1758 } |
| 1755 break; | 1759 break; |
| 1756 } | 1760 } |
| 1757 case InstCast::Trunc: { | 1761 case InstCast::Trunc: { |
| 1758 if (isVectorType(Dest->getType())) { | 1762 if (isVectorType(Dest->getType())) { |
| 1759 UnimplementedError(Func->getContext()->getFlags()); | 1763 UnimplementedError(Func->getContext()->getFlags()); |
| 1760 } else { | 1764 } else { |
| 1761 Operand *Src0 = Inst->getSrc(0); | |
| 1762 if (Src0->getType() == IceType_i64) | 1765 if (Src0->getType() == IceType_i64) |
| 1763 Src0 = loOperand(Src0); | 1766 Src0 = loOperand(Src0); |
| 1764 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); | 1767 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
| 1765 // t1 = trunc Src0RF; Dest = t1 | 1768 // t1 = trunc Src0RF; Dest = t1 |
| 1766 Variable *T = makeReg(Dest->getType()); | 1769 Variable *T = makeReg(Dest->getType()); |
| 1767 _mov(T, Src0RF); | 1770 _mov(T, Src0RF); |
| 1768 if (Dest->getType() == IceType_i1) | 1771 if (Dest->getType() == IceType_i1) |
| 1769 _and(T, T, Ctx->getConstantInt1(1)); | 1772 _and(T, T, Ctx->getConstantInt1(1)); |
| 1770 _mov(Dest, T); | 1773 _mov(Dest, T); |
| 1771 } | 1774 } |
| (...skipping 13 matching lines...) Expand all Loading... |
| 1785 UnimplementedError(Func->getContext()->getFlags()); | 1788 UnimplementedError(Func->getContext()->getFlags()); |
| 1786 break; | 1789 break; |
| 1787 case InstCast::Sitofp: | 1790 case InstCast::Sitofp: |
| 1788 UnimplementedError(Func->getContext()->getFlags()); | 1791 UnimplementedError(Func->getContext()->getFlags()); |
| 1789 break; | 1792 break; |
| 1790 case InstCast::Uitofp: { | 1793 case InstCast::Uitofp: { |
| 1791 UnimplementedError(Func->getContext()->getFlags()); | 1794 UnimplementedError(Func->getContext()->getFlags()); |
| 1792 break; | 1795 break; |
| 1793 } | 1796 } |
| 1794 case InstCast::Bitcast: { | 1797 case InstCast::Bitcast: { |
| 1795 Operand *Src0 = Inst->getSrc(0); | |
| 1796 if (Dest->getType() == Src0->getType()) { | 1798 if (Dest->getType() == Src0->getType()) { |
| 1797 InstAssign *Assign = InstAssign::create(Func, Dest, Src0); | 1799 InstAssign *Assign = InstAssign::create(Func, Dest, Src0); |
| 1798 lowerAssign(Assign); | 1800 lowerAssign(Assign); |
| 1799 return; | 1801 return; |
| 1800 } | 1802 } |
| 1801 UnimplementedError(Func->getContext()->getFlags()); | 1803 UnimplementedError(Func->getContext()->getFlags()); |
| 1802 break; | 1804 break; |
| 1803 } | 1805 } |
| 1804 } | 1806 } |
| 1805 } | 1807 } |
| (...skipping 784 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2590 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; | 2592 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; |
| 2591 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { | 2593 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { |
| 2592 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; | 2594 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; |
| 2593 } | 2595 } |
| 2594 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 2596 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
| 2595 // However, for compatibility with current NaCl LLVM, don't claim that. | 2597 // However, for compatibility with current NaCl LLVM, don't claim that. |
| 2596 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 2598 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
| 2597 } | 2599 } |
| 2598 | 2600 |
| 2599 } // end of namespace Ice | 2601 } // end of namespace Ice |
| OLD | NEW |