| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| 11 /// This file implements the TargetLoweringARM32 class, which consists almost | 11 /// This file implements the TargetLoweringARM32 class, which consists almost |
| 12 /// entirely of the lowering sequence for each high-level instruction. | 12 /// entirely of the lowering sequence for each high-level instruction. |
| 13 /// | 13 /// |
| 14 //===----------------------------------------------------------------------===// | 14 //===----------------------------------------------------------------------===// |
| 15 | 15 |
| 16 #include "IceTargetLoweringARM32.h" | 16 #include "IceTargetLoweringARM32.h" |
| 17 | 17 |
| 18 #include "IceCfg.h" | 18 #include "IceCfg.h" |
| 19 #include "IceCfgNode.h" | 19 #include "IceCfgNode.h" |
| 20 #include "IceClFlags.h" | 20 #include "IceClFlags.h" |
| 21 #include "IceDefs.h" | 21 #include "IceDefs.h" |
| 22 #include "IceELFObjectWriter.h" | 22 #include "IceELFObjectWriter.h" |
| 23 #include "IceGlobalInits.h" | 23 #include "IceGlobalInits.h" |
| 24 #include "IceInstARM32.h" | 24 #include "IceInstARM32.h" |
| 25 #include "IceLiveness.h" | 25 #include "IceLiveness.h" |
| 26 #include "IceOperand.h" | 26 #include "IceOperand.h" |
| 27 #include "IceRegistersARM32.h" | 27 #include "IceRegistersARM32.h" |
| 28 #include "IceTargetLoweringARM32.def" | 28 #include "IceTargetLoweringARM32.def" |
| 29 #include "IceUtils.h" | 29 #include "IceUtils.h" |
| 30 |
| 31 #pragma clang diagnostic push |
| 32 #pragma clang diagnostic ignored "-Wunused-parameter" |
| 33 #pragma clang diagnostic ignored "-Wshadow" |
| 30 #include "llvm/Support/MathExtras.h" | 34 #include "llvm/Support/MathExtras.h" |
| 35 #pragma clang diagnostic pop |
| 31 | 36 |
| 32 namespace Ice { | 37 namespace Ice { |
| 33 | 38 |
| 34 namespace { | 39 namespace { |
| 35 | 40 |
| 36 void UnimplementedError(const ClFlags &Flags) { | 41 void UnimplementedError(const ClFlags &Flags) { |
| 37 if (!Flags.getSkipUnimplemented()) { | 42 if (!Flags.getSkipUnimplemented()) { |
| 38 // Use llvm_unreachable instead of report_fatal_error, which gives better | 43 // Use llvm_unreachable instead of report_fatal_error, which gives better |
| 39 // stack traces. | 44 // stack traces. |
| 40 llvm_unreachable("Not yet implemented"); | 45 llvm_unreachable("Not yet implemented"); |
| (...skipping 1214 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1255 // orr t_hi, t_hi, b.lo, lsr t_c2 | 1260 // orr t_hi, t_hi, b.lo, lsr t_c2 |
| 1256 // lsl t_lo, b.lo, c.lo | 1261 // lsl t_lo, b.lo, c.lo |
| 1257 // a.lo = t_lo | 1262 // a.lo = t_lo |
| 1258 // a.hi = t_hi | 1263 // a.hi = t_hi |
| 1259 // Can be strength-reduced for constant-shifts, but we don't do | 1264 // Can be strength-reduced for constant-shifts, but we don't do |
| 1260 // that for now. | 1265 // that for now. |
| 1261 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. | 1266 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. |
| 1262 // On ARM, shifts only take the lower 8 bits of the shift register, | 1267 // On ARM, shifts only take the lower 8 bits of the shift register, |
| 1263 // and saturate to the range 0-32, so the negative value will | 1268 // and saturate to the range 0-32, so the negative value will |
| 1264 // saturate to 32. | 1269 // saturate to 32. |
| 1265 Variable *T_Hi = makeReg(IceType_i32); | |
| 1266 Variable *Src1RLo = legalizeToVar(Src1Lo); | 1270 Variable *Src1RLo = legalizeToVar(Src1Lo); |
| 1267 Constant *ThirtyTwo = Ctx->getConstantInt32(32); | 1271 Constant *ThirtyTwo = Ctx->getConstantInt32(32); |
| 1268 Variable *T_C1 = makeReg(IceType_i32); | 1272 Variable *T_C1 = makeReg(IceType_i32); |
| 1269 Variable *T_C2 = makeReg(IceType_i32); | 1273 Variable *T_C2 = makeReg(IceType_i32); |
| 1270 _sub(T_C1, Src1RLo, ThirtyTwo); | 1274 _sub(T_C1, Src1RLo, ThirtyTwo); |
| 1271 _lsl(T_Hi, Src0RHi, Src1RLo); | 1275 _lsl(T_Hi, Src0RHi, Src1RLo); |
| 1272 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | 1276 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
| 1273 OperandARM32::LSL, T_C1)); | 1277 OperandARM32::LSL, T_C1)); |
| 1274 _rsb(T_C2, Src1RLo, ThirtyTwo); | 1278 _rsb(T_C2, Src1RLo, ThirtyTwo); |
| 1275 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | 1279 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
| 1276 OperandARM32::LSR, T_C2)); | 1280 OperandARM32::LSR, T_C2)); |
| 1277 _mov(DestHi, T_Hi); | 1281 _mov(DestHi, T_Hi); |
| 1278 Variable *T_Lo = makeReg(IceType_i32); | |
| 1279 // _mov seems to sometimes have better register preferencing than lsl. | 1282 // _mov seems to sometimes have better register preferencing than lsl. |
| 1280 // Otherwise mov w/ lsl shifted register is a pseudo-instruction | 1283 // Otherwise mov w/ lsl shifted register is a pseudo-instruction |
| 1281 // that maps to lsl. | 1284 // that maps to lsl. |
| 1282 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | 1285 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
| 1283 OperandARM32::LSL, Src1RLo)); | 1286 OperandARM32::LSL, Src1RLo)); |
| 1284 _mov(DestLo, T_Lo); | 1287 _mov(DestLo, T_Lo); |
| 1285 return; | 1288 return; |
| 1286 } | 1289 } |
| 1287 case InstArithmetic::Lshr: | 1290 case InstArithmetic::Lshr: |
| 1288 // a=b>>c (unsigned) ==> | 1291 // a=b>>c (unsigned) ==> |
| 1289 // GCC 4.8 does: | 1292 // GCC 4.8 does: |
| 1290 // rsb t_c1, c.lo, #32 | 1293 // rsb t_c1, c.lo, #32 |
| 1291 // lsr t_lo, b.lo, c.lo | 1294 // lsr t_lo, b.lo, c.lo |
| 1292 // orr t_lo, t_lo, b.hi, lsl t_c1 | 1295 // orr t_lo, t_lo, b.hi, lsl t_c1 |
| 1293 // sub t_c2, c.lo, #32 | 1296 // sub t_c2, c.lo, #32 |
| 1294 // orr t_lo, t_lo, b.hi, lsr t_c2 | 1297 // orr t_lo, t_lo, b.hi, lsr t_c2 |
| 1295 // lsr t_hi, b.hi, c.lo | 1298 // lsr t_hi, b.hi, c.lo |
| 1296 // a.lo = t_lo | 1299 // a.lo = t_lo |
| 1297 // a.hi = t_hi | 1300 // a.hi = t_hi |
| 1298 case InstArithmetic::Ashr: { | 1301 case InstArithmetic::Ashr: { |
| 1299 // a=b>>c (signed) ==> ... | 1302 // a=b>>c (signed) ==> ... |
| 1300 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, | 1303 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, |
| 1301 // and the next orr should be conditioned on PLUS. The last two | 1304 // and the next orr should be conditioned on PLUS. The last two |
| 1302 // right shifts should also be arithmetic. | 1305 // right shifts should also be arithmetic. |
| 1303 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; | 1306 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; |
| 1304 Variable *T_Lo = makeReg(IceType_i32); | |
| 1305 Variable *Src1RLo = legalizeToVar(Src1Lo); | 1307 Variable *Src1RLo = legalizeToVar(Src1Lo); |
| 1306 Constant *ThirtyTwo = Ctx->getConstantInt32(32); | 1308 Constant *ThirtyTwo = Ctx->getConstantInt32(32); |
| 1307 Variable *T_C1 = makeReg(IceType_i32); | 1309 Variable *T_C1 = makeReg(IceType_i32); |
| 1308 Variable *T_C2 = makeReg(IceType_i32); | 1310 Variable *T_C2 = makeReg(IceType_i32); |
| 1309 _rsb(T_C1, Src1RLo, ThirtyTwo); | 1311 _rsb(T_C1, Src1RLo, ThirtyTwo); |
| 1310 _lsr(T_Lo, Src0RLo, Src1RLo); | 1312 _lsr(T_Lo, Src0RLo, Src1RLo); |
| 1311 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | 1313 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
| 1312 OperandARM32::LSL, T_C1)); | 1314 OperandARM32::LSL, T_C1)); |
| 1313 OperandARM32::ShiftKind RShiftKind; | 1315 OperandARM32::ShiftKind RShiftKind; |
| 1314 CondARM32::Cond Pred; | 1316 CondARM32::Cond Pred; |
| 1315 if (IsAshr) { | 1317 if (IsAshr) { |
| 1316 _subs(T_C2, Src1RLo, ThirtyTwo); | 1318 _subs(T_C2, Src1RLo, ThirtyTwo); |
| 1317 RShiftKind = OperandARM32::ASR; | 1319 RShiftKind = OperandARM32::ASR; |
| 1318 Pred = CondARM32::PL; | 1320 Pred = CondARM32::PL; |
| 1319 } else { | 1321 } else { |
| 1320 _sub(T_C2, Src1RLo, ThirtyTwo); | 1322 _sub(T_C2, Src1RLo, ThirtyTwo); |
| 1321 RShiftKind = OperandARM32::LSR; | 1323 RShiftKind = OperandARM32::LSR; |
| 1322 Pred = CondARM32::AL; | 1324 Pred = CondARM32::AL; |
| 1323 } | 1325 } |
| 1324 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | 1326 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
| 1325 RShiftKind, T_C2), | 1327 RShiftKind, T_C2), |
| 1326 Pred); | 1328 Pred); |
| 1327 _mov(DestLo, T_Lo); | 1329 _mov(DestLo, T_Lo); |
| 1328 Variable *T_Hi = makeReg(IceType_i32); | |
| 1329 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | 1330 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
| 1330 RShiftKind, Src1RLo)); | 1331 RShiftKind, Src1RLo)); |
| 1331 _mov(DestHi, T_Hi); | 1332 _mov(DestHi, T_Hi); |
| 1332 return; | 1333 return; |
| 1333 } | 1334 } |
| 1334 case InstArithmetic::Fadd: | 1335 case InstArithmetic::Fadd: |
| 1335 case InstArithmetic::Fsub: | 1336 case InstArithmetic::Fsub: |
| 1336 case InstArithmetic::Fmul: | 1337 case InstArithmetic::Fmul: |
| 1337 case InstArithmetic::Fdiv: | 1338 case InstArithmetic::Fdiv: |
| 1338 case InstArithmetic::Frem: | 1339 case InstArithmetic::Frem: |
| (...skipping 316 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1655 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); | 1656 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); |
| 1656 Context.insert(NewCall); | 1657 Context.insert(NewCall); |
| 1657 if (ReturnRegHi) | 1658 if (ReturnRegHi) |
| 1658 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | 1659 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
| 1659 | 1660 |
| 1660 // Add the appropriate offset to SP. The call instruction takes care | 1661 // Add the appropriate offset to SP. The call instruction takes care |
| 1661 // of resetting the stack offset during emission. | 1662 // of resetting the stack offset during emission. |
| 1662 if (ParameterAreaSizeBytes) { | 1663 if (ParameterAreaSizeBytes) { |
| 1663 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), | 1664 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), |
| 1664 Legal_Reg | Legal_Flex); | 1665 Legal_Reg | Legal_Flex); |
| 1665 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | |
| 1666 _add(SP, SP, AddAmount); | 1666 _add(SP, SP, AddAmount); |
| 1667 } | 1667 } |
| 1668 | 1668 |
| 1669 // Insert a register-kill pseudo instruction. | 1669 // Insert a register-kill pseudo instruction. |
| 1670 Context.insert(InstFakeKill::create(Func, NewCall)); | 1670 Context.insert(InstFakeKill::create(Func, NewCall)); |
| 1671 | 1671 |
| 1672 // Generate a FakeUse to keep the call live if necessary. | 1672 // Generate a FakeUse to keep the call live if necessary. |
| 1673 if (Instr->hasSideEffects() && ReturnReg) { | 1673 if (Instr->hasSideEffects() && ReturnReg) { |
| 1674 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); | 1674 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); |
| 1675 Context.insert(FakeUse); | 1675 Context.insert(FakeUse); |
| (...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1801 Variable *T = makeReg(Dest->getType()); | 1801 Variable *T = makeReg(Dest->getType()); |
| 1802 _uxt(T, Src0R); | 1802 _uxt(T, Src0R); |
| 1803 _mov(Dest, T); | 1803 _mov(Dest, T); |
| 1804 } | 1804 } |
| 1805 break; | 1805 break; |
| 1806 } | 1806 } |
| 1807 case InstCast::Trunc: { | 1807 case InstCast::Trunc: { |
| 1808 if (isVectorType(Dest->getType())) { | 1808 if (isVectorType(Dest->getType())) { |
| 1809 UnimplementedError(Func->getContext()->getFlags()); | 1809 UnimplementedError(Func->getContext()->getFlags()); |
| 1810 } else { | 1810 } else { |
| 1811 Operand *Src0 = Inst->getSrc(0); | |
| 1812 if (Src0->getType() == IceType_i64) | 1811 if (Src0->getType() == IceType_i64) |
| 1813 Src0 = loOperand(Src0); | 1812 Src0 = loOperand(Src0); |
| 1814 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); | 1813 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
| 1815 // t1 = trunc Src0RF; Dest = t1 | 1814 // t1 = trunc Src0RF; Dest = t1 |
| 1816 Variable *T = makeReg(Dest->getType()); | 1815 Variable *T = makeReg(Dest->getType()); |
| 1817 _mov(T, Src0RF); | 1816 _mov(T, Src0RF); |
| 1818 if (Dest->getType() == IceType_i1) | 1817 if (Dest->getType() == IceType_i1) |
| 1819 _and(T, T, Ctx->getConstantInt1(1)); | 1818 _and(T, T, Ctx->getConstantInt1(1)); |
| 1820 _mov(Dest, T); | 1819 _mov(Dest, T); |
| 1821 } | 1820 } |
| (...skipping 13 matching lines...) Expand all Loading... |
| 1835 UnimplementedError(Func->getContext()->getFlags()); | 1834 UnimplementedError(Func->getContext()->getFlags()); |
| 1836 break; | 1835 break; |
| 1837 case InstCast::Sitofp: | 1836 case InstCast::Sitofp: |
| 1838 UnimplementedError(Func->getContext()->getFlags()); | 1837 UnimplementedError(Func->getContext()->getFlags()); |
| 1839 break; | 1838 break; |
| 1840 case InstCast::Uitofp: { | 1839 case InstCast::Uitofp: { |
| 1841 UnimplementedError(Func->getContext()->getFlags()); | 1840 UnimplementedError(Func->getContext()->getFlags()); |
| 1842 break; | 1841 break; |
| 1843 } | 1842 } |
| 1844 case InstCast::Bitcast: { | 1843 case InstCast::Bitcast: { |
| 1845 Operand *Src0 = Inst->getSrc(0); | |
| 1846 if (Dest->getType() == Src0->getType()) { | 1844 if (Dest->getType() == Src0->getType()) { |
| 1847 InstAssign *Assign = InstAssign::create(Func, Dest, Src0); | 1845 InstAssign *Assign = InstAssign::create(Func, Dest, Src0); |
| 1848 lowerAssign(Assign); | 1846 lowerAssign(Assign); |
| 1849 return; | 1847 return; |
| 1850 } | 1848 } |
| 1851 UnimplementedError(Func->getContext()->getFlags()); | 1849 UnimplementedError(Func->getContext()->getFlags()); |
| 1852 break; | 1850 break; |
| 1853 } | 1851 } |
| 1854 } | 1852 } |
| 1855 } | 1853 } |
| (...skipping 890 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2746 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; | 2744 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; |
| 2747 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { | 2745 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { |
| 2748 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; | 2746 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; |
| 2749 } | 2747 } |
| 2750 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 2748 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
| 2751 // However, for compatibility with current NaCl LLVM, don't claim that. | 2749 // However, for compatibility with current NaCl LLVM, don't claim that. |
| 2752 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 2750 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
| 2753 } | 2751 } |
| 2754 | 2752 |
| 2755 } // end of namespace Ice | 2753 } // end of namespace Ice |
| OLD | NEW |