OLD | NEW |
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
11 /// This file implements the TargetLoweringARM32 class, which consists almost | 11 /// This file implements the TargetLoweringARM32 class, which consists almost |
12 /// entirely of the lowering sequence for each high-level instruction. | 12 /// entirely of the lowering sequence for each high-level instruction. |
13 /// | 13 /// |
14 //===----------------------------------------------------------------------===// | 14 //===----------------------------------------------------------------------===// |
15 #include "IceTargetLoweringARM32.h" | 15 #include "IceTargetLoweringARM32.h" |
16 | 16 |
17 #include "IceCfg.h" | 17 #include "IceCfg.h" |
18 #include "IceCfgNode.h" | 18 #include "IceCfgNode.h" |
19 #include "IceClFlags.h" | 19 #include "IceClFlags.h" |
20 #include "IceDefs.h" | 20 #include "IceDefs.h" |
21 #include "IceELFObjectWriter.h" | 21 #include "IceELFObjectWriter.h" |
22 #include "IceGlobalInits.h" | 22 #include "IceGlobalInits.h" |
23 #include "IceInstARM32.def" | 23 #include "IceInstARM32.def" |
24 #include "IceInstARM32.h" | 24 #include "IceInstARM32.h" |
| 25 #include "IceInstVarIter.h" |
25 #include "IceLiveness.h" | 26 #include "IceLiveness.h" |
26 #include "IceOperand.h" | 27 #include "IceOperand.h" |
27 #include "IcePhiLoweringImpl.h" | 28 #include "IcePhiLoweringImpl.h" |
28 #include "IceRegistersARM32.h" | 29 #include "IceRegistersARM32.h" |
29 #include "IceTargetLoweringARM32.def" | 30 #include "IceTargetLoweringARM32.def" |
30 #include "IceUtils.h" | 31 #include "IceUtils.h" |
31 #include "llvm/Support/MathExtras.h" | 32 #include "llvm/Support/MathExtras.h" |
32 | 33 |
33 #include <algorithm> | 34 #include <algorithm> |
34 #include <utility> | 35 #include <utility> |
(...skipping 1761 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1796 _mov(Dest, SrcR); | 1797 _mov(Dest, SrcR); |
1797 } else if (isFloatingType(Dest->getType())) { | 1798 } else if (isFloatingType(Dest->getType())) { |
1798 Variable *SrcR = legalizeToReg(NewSrc); | 1799 Variable *SrcR = legalizeToReg(NewSrc); |
1799 _mov(Dest, SrcR); | 1800 _mov(Dest, SrcR); |
1800 } else { | 1801 } else { |
1801 _mov(Dest, NewSrc); | 1802 _mov(Dest, NewSrc); |
1802 } | 1803 } |
1803 } | 1804 } |
1804 } | 1805 } |
1805 | 1806 |
1806 void TargetARM32::lowerBr(const InstBr *Inst) { | 1807 void TargetARM32::lowerBr(const InstBr *Instr) { |
1807 if (Inst->isUnconditional()) { | 1808 if (Instr->isUnconditional()) { |
1808 _br(Inst->getTargetUnconditional()); | 1809 _br(Instr->getTargetUnconditional()); |
1809 return; | 1810 return; |
1810 } | 1811 } |
1811 Operand *Cond = Inst->getCondition(); | 1812 Operand *Cond = Instr->getCondition(); |
1812 // TODO(jvoung): Handle folding opportunities. | |
1813 | 1813 |
1814 Type Ty = Cond->getType(); | 1814 CondARM32::Cond BrCondTrue0 = CondARM32::NE; |
1815 Variable *Src0R = legalizeToReg(Cond); | 1815 CondARM32::Cond BrCondTrue1 = CondARM32::kNone; |
1816 assert(Ty == IceType_i1); | 1816 CondARM32::Cond BrCondFalse = CondARM32::kNone; |
1817 if (Ty != IceType_i32) | 1817 if (!_mov_i1_to_flags(Cond, &BrCondTrue0, &BrCondTrue1, &BrCondFalse)) { |
1818 _uxt(Src0R, Src0R); | 1818 // "Cond" was not fold. |
1819 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1819 Type Ty = Cond->getType(); |
1820 _cmp(Src0R, Zero); | 1820 Variable *Src0R = legalizeToReg(Cond); |
1821 _br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE); | 1821 assert(Ty == IceType_i1); |
| 1822 if (Ty != IceType_i32) |
| 1823 _uxt(Src0R, Src0R); |
| 1824 Constant *_0 = Ctx->getConstantZero(IceType_i32); |
| 1825 _cmp(Src0R, _0); |
| 1826 BrCondTrue0 = CondARM32::NE; |
| 1827 } |
| 1828 |
| 1829 if (BrCondTrue1 != CondARM32::kNone) { |
| 1830 _br(Instr->getTargetTrue(), BrCondTrue1); |
| 1831 } |
| 1832 |
| 1833 if (BrCondTrue0 == CondARM32::kNone) { |
| 1834 assert(BrCondTrue1 == CondARM32::kNone); |
| 1835 _br(Instr->getTargetFalse()); |
| 1836 return; |
| 1837 } |
| 1838 |
| 1839 if (BrCondTrue0 == CondARM32::AL) { |
| 1840 assert(BrCondTrue1 == CondARM32::kNone); |
| 1841 assert(BrCondFalse == CondARM32::kNone); |
| 1842 _br(Instr->getTargetTrue()); |
| 1843 return; |
| 1844 } |
| 1845 |
| 1846 _br(Instr->getTargetTrue(), Instr->getTargetFalse(), BrCondTrue0); |
1822 } | 1847 } |
1823 | 1848 |
1824 void TargetARM32::lowerCall(const InstCall *Instr) { | 1849 void TargetARM32::lowerCall(const InstCall *Instr) { |
1825 MaybeLeafFunc = false; | 1850 MaybeLeafFunc = false; |
1826 NeedsStackAlignment = true; | 1851 NeedsStackAlignment = true; |
1827 | 1852 |
1828 // Assign arguments to registers and stack. Also reserve stack. | 1853 // Assign arguments to registers and stack. Also reserve stack. |
1829 TargetARM32::CallingConv CC; | 1854 TargetARM32::CallingConv CC; |
1830 // Pair of Arg Operand -> GPR number assignments. | 1855 // Pair of Arg Operand -> GPR number assignments. |
1831 llvm::SmallVector<std::pair<Operand *, int32_t>, | 1856 llvm::SmallVector<std::pair<Operand *, int32_t>, |
(...skipping 211 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2043 UnimplementedError(Func->getContext()->getFlags()); | 2068 UnimplementedError(Func->getContext()->getFlags()); |
2044 } else if (Dest->getType() == IceType_i64) { | 2069 } else if (Dest->getType() == IceType_i64) { |
2045 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2 | 2070 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2 |
2046 Constant *ShiftAmt = Ctx->getConstantInt32(31); | 2071 Constant *ShiftAmt = Ctx->getConstantInt32(31); |
2047 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 2072 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
2048 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2073 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2049 Variable *T_Lo = makeReg(DestLo->getType()); | 2074 Variable *T_Lo = makeReg(DestLo->getType()); |
2050 if (Src0->getType() == IceType_i32) { | 2075 if (Src0->getType() == IceType_i32) { |
2051 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); | 2076 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
2052 _mov(T_Lo, Src0RF); | 2077 _mov(T_Lo, Src0RF); |
2053 } else if (Src0->getType() == IceType_i1) { | 2078 } else if (Src0->getType() != IceType_i1) { |
2054 Variable *Src0R = legalizeToReg(Src0); | |
2055 _lsl(T_Lo, Src0R, ShiftAmt); | |
2056 _asr(T_Lo, T_Lo, ShiftAmt); | |
2057 } else { | |
2058 Variable *Src0R = legalizeToReg(Src0); | 2079 Variable *Src0R = legalizeToReg(Src0); |
2059 _sxt(T_Lo, Src0R); | 2080 _sxt(T_Lo, Src0R); |
| 2081 } else { |
| 2082 CondARM32::Cond CondTrue0, CondTrue1, CondFalse; |
| 2083 if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) { |
| 2084 // Handle bool folding. |
| 2085 Constant *_0 = Ctx->getConstantZero(IceType_i32); |
| 2086 Operand *_m1 = |
| 2087 legalize(Ctx->getConstantInt32(-1), Legal_Reg | Legal_Flex); |
| 2088 _cmov(T_Lo, _m1, CondTrue0, CondTrue1, _0, CondFalse); |
| 2089 } else { |
| 2090 Variable *Src0R = legalizeToReg(Src0); |
| 2091 _lsl(T_Lo, Src0R, ShiftAmt); |
| 2092 _asr(T_Lo, T_Lo, ShiftAmt); |
| 2093 } |
2060 } | 2094 } |
2061 _mov(DestLo, T_Lo); | 2095 _mov(DestLo, T_Lo); |
2062 Variable *T_Hi = makeReg(DestHi->getType()); | 2096 Variable *T_Hi = makeReg(DestHi->getType()); |
2063 if (Src0->getType() != IceType_i1) { | 2097 if (Src0->getType() != IceType_i1) { |
2064 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo, | 2098 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo, |
2065 OperandARM32::ASR, ShiftAmt)); | 2099 OperandARM32::ASR, ShiftAmt)); |
2066 } else { | 2100 } else { |
2067 // For i1, the asr instruction is already done above. | 2101 // For i1, the asr instruction is already done above. |
2068 _mov(T_Hi, T_Lo); | 2102 _mov(T_Hi, T_Lo); |
2069 } | 2103 } |
2070 _mov(DestHi, T_Hi); | 2104 _mov(DestHi, T_Hi); |
2071 } else if (Src0->getType() == IceType_i1) { | 2105 } else if (Src0->getType() != IceType_i1) { |
2072 // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1. | |
2073 // lsl t1, src_reg, 31 | |
2074 // asr t1, t1, 31 | |
2075 // dst = t1 | |
2076 Variable *Src0R = legalizeToReg(Src0); | |
2077 Constant *ShiftAmt = Ctx->getConstantInt32(31); | |
2078 Variable *T = makeReg(Dest->getType()); | |
2079 _lsl(T, Src0R, ShiftAmt); | |
2080 _asr(T, T, ShiftAmt); | |
2081 _mov(Dest, T); | |
2082 } else { | |
2083 // t1 = sxt src; dst = t1 | 2106 // t1 = sxt src; dst = t1 |
2084 Variable *Src0R = legalizeToReg(Src0); | 2107 Variable *Src0R = legalizeToReg(Src0); |
2085 Variable *T = makeReg(Dest->getType()); | 2108 Variable *T = makeReg(Dest->getType()); |
2086 _sxt(T, Src0R); | 2109 _sxt(T, Src0R); |
2087 _mov(Dest, T); | 2110 _mov(Dest, T); |
| 2111 } else { |
| 2112 Variable *T = makeReg(Dest->getType()); |
| 2113 CondARM32::Cond CondTrue0, CondTrue1, CondFalse; |
| 2114 if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) { |
| 2115 // Handle bool folding. |
| 2116 Constant *_0 = Ctx->getConstantZero(IceType_i32); |
| 2117 Operand *_m1 = |
| 2118 legalize(Ctx->getConstantInt32(-1), Legal_Reg | Legal_Flex); |
| 2119 _cmov(T, _m1, CondTrue0, CondTrue1, _0, CondFalse); |
| 2120 } else { |
| 2121 // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1. |
| 2122 // lsl t1, src_reg, 31 |
| 2123 // asr t1, t1, 31 |
| 2124 // dst = t1 |
| 2125 Variable *Src0R = legalizeToReg(Src0); |
| 2126 Constant *ShiftAmt = Ctx->getConstantInt32(31); |
| 2127 _lsl(T, Src0R, ShiftAmt); |
| 2128 _asr(T, T, ShiftAmt); |
| 2129 } |
| 2130 _mov(Dest, T); |
2088 } | 2131 } |
2089 break; | 2132 break; |
2090 } | 2133 } |
2091 case InstCast::Zext: { | 2134 case InstCast::Zext: { |
2092 if (isVectorType(Dest->getType())) { | 2135 if (isVectorType(Dest->getType())) { |
2093 Variable *T = makeReg(Dest->getType()); | 2136 Variable *T = makeReg(Dest->getType()); |
2094 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0))); | 2137 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0))); |
2095 _mov(Dest, T); | 2138 _mov(Dest, T); |
2096 UnimplementedError(Func->getContext()->getFlags()); | 2139 UnimplementedError(Func->getContext()->getFlags()); |
2097 } else if (Dest->getType() == IceType_i64) { | 2140 } else if (Dest->getType() == IceType_i64) { |
2098 // t1=uxtb src; dst.lo=t1; dst.hi=0 | 2141 // t1=uxtb src; dst.lo=t1; dst.hi=0 |
2099 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2142 Constant *_0 = Ctx->getConstantZero(IceType_i32); |
| 2143 Constant *_1 = Ctx->getConstantInt32(1); |
2100 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 2144 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
2101 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2145 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2102 Variable *T_Lo = makeReg(DestLo->getType()); | 2146 Variable *T_Lo = makeReg(DestLo->getType()); |
| 2147 |
| 2148 CondARM32::Cond CondTrue0, CondTrue1, CondFalse; |
| 2149 if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) { |
| 2150 // Handle folding opportunities. |
| 2151 Variable *T_Hi = makeReg(DestLo->getType()); |
| 2152 _mov(T_Hi, _0); |
| 2153 _mov(DestHi, T_Hi); |
| 2154 _cmov(T_Lo, _1, CondTrue0, CondTrue1, _0, CondFalse); |
| 2155 _mov(DestLo, T_Lo); |
| 2156 return; |
| 2157 } |
| 2158 |
2103 // i32 and i1 can just take up the whole register. i32 doesn't need uxt, | 2159 // i32 and i1 can just take up the whole register. i32 doesn't need uxt, |
2104 // while i1 will have an and mask later anyway. | 2160 // while i1 will have an and mask later anyway. |
2105 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) { | 2161 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) { |
2106 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); | 2162 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
2107 _mov(T_Lo, Src0RF); | 2163 _mov(T_Lo, Src0RF); |
2108 } else { | 2164 } else { |
2109 Variable *Src0R = legalizeToReg(Src0); | 2165 Variable *Src0R = legalizeToReg(Src0); |
2110 _uxt(T_Lo, Src0R); | 2166 _uxt(T_Lo, Src0R); |
2111 } | 2167 } |
2112 if (Src0->getType() == IceType_i1) { | 2168 if (Src0->getType() == IceType_i1) { |
2113 Constant *One = Ctx->getConstantInt32(1); | 2169 Constant *One = Ctx->getConstantInt32(1); |
2114 _and(T_Lo, T_Lo, One); | 2170 _and(T_Lo, T_Lo, One); |
2115 } | 2171 } |
2116 _mov(DestLo, T_Lo); | 2172 _mov(DestLo, T_Lo); |
2117 Variable *T_Hi = makeReg(DestLo->getType()); | 2173 Variable *T_Hi = makeReg(DestLo->getType()); |
2118 _mov(T_Hi, Zero); | 2174 _mov(T_Hi, _0); |
2119 _mov(DestHi, T_Hi); | 2175 _mov(DestHi, T_Hi); |
2120 } else if (Src0->getType() == IceType_i1) { | 2176 } else if (Src0->getType() == IceType_i1) { |
| 2177 Constant *_1 = Ctx->getConstantInt32(1); |
| 2178 Variable *T = makeReg(Dest->getType()); |
| 2179 |
| 2180 CondARM32::Cond CondTrue0, CondTrue1, CondFalse; |
| 2181 if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) { |
| 2182 // Handle folding opportunities. |
| 2183 Constant *_0 = Ctx->getConstantZero(IceType_i32); |
| 2184 _cmov(T, _1, CondTrue0, CondTrue1, _0, CondFalse); |
| 2185 _mov(Dest, T); |
| 2186 return; |
| 2187 } |
| 2188 |
2121 // t = Src0; t &= 1; Dest = t | 2189 // t = Src0; t &= 1; Dest = t |
2122 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); | 2190 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
2123 Constant *One = Ctx->getConstantInt32(1); | |
2124 Variable *T = makeReg(Dest->getType()); | |
2125 // Just use _mov instead of _uxt since all registers are 32-bit. _uxt | 2191 // Just use _mov instead of _uxt since all registers are 32-bit. _uxt |
2126 // requires the source to be a register so could have required a _mov | 2192 // requires the source to be a register so could have required a _mov |
2127 // from legalize anyway. | 2193 // from legalize anyway. |
2128 _mov(T, Src0RF); | 2194 _mov(T, Src0RF); |
2129 _and(T, T, One); | 2195 _and(T, T, _1); |
2130 _mov(Dest, T); | 2196 _mov(Dest, T); |
2131 } else { | 2197 } else { |
2132 // t1 = uxt src; dst = t1 | 2198 // t1 = uxt src; dst = t1 |
2133 Variable *Src0R = legalizeToReg(Src0); | 2199 Variable *Src0R = legalizeToReg(Src0); |
2134 Variable *T = makeReg(Dest->getType()); | 2200 Variable *T = makeReg(Dest->getType()); |
2135 _uxt(T, Src0R); | 2201 _uxt(T, Src0R); |
2136 _mov(Dest, T); | 2202 _mov(Dest, T); |
2137 } | 2203 } |
2138 break; | 2204 break; |
2139 } | 2205 } |
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2390 CondARM32::Cond CC1; | 2456 CondARM32::Cond CC1; |
2391 } TableFcmp[] = { | 2457 } TableFcmp[] = { |
2392 #define X(val, CC0, CC1) \ | 2458 #define X(val, CC0, CC1) \ |
2393 { CondARM32::CC0, CondARM32::CC1 } \ | 2459 { CondARM32::CC0, CondARM32::CC1 } \ |
2394 , | 2460 , |
2395 FCMPARM32_TABLE | 2461 FCMPARM32_TABLE |
2396 #undef X | 2462 #undef X |
2397 }; | 2463 }; |
2398 } // end of anonymous namespace | 2464 } // end of anonymous namespace |
2399 | 2465 |
2400 void TargetARM32::lowerFcmp(const InstFcmp *Inst) { | 2466 void TargetARM32::lowerFcmpCond(const InstFcmp *Instr, |
2401 Variable *Dest = Inst->getDest(); | 2467 CondARM32::Cond *CondIfTrue0, |
| 2468 CondARM32::Cond *CondIfTrue1, |
| 2469 CondARM32::Cond *CondIfFalse) { |
| 2470 InstFcmp::FCond Condition = Instr->getCondition(); |
| 2471 switch (Condition) { |
| 2472 case InstFcmp::False: |
| 2473 *CondIfFalse = CondARM32::AL; |
| 2474 *CondIfTrue0 = *CondIfTrue1 = CondARM32::kNone; |
| 2475 break; |
| 2476 case InstFcmp::True: |
| 2477 *CondIfFalse = *CondIfTrue1 = CondARM32::kNone; |
| 2478 *CondIfTrue0 = CondARM32::AL; |
| 2479 break; |
| 2480 default: { |
| 2481 Variable *Src0R = legalizeToReg(Instr->getSrc(0)); |
| 2482 Variable *Src1R = legalizeToReg(Instr->getSrc(1)); |
| 2483 _vcmp(Src0R, Src1R); |
| 2484 _vmrs(); |
| 2485 assert(Condition < llvm::array_lengthof(TableFcmp)); |
| 2486 *CondIfTrue0 = TableFcmp[Condition].CC0; |
| 2487 *CondIfTrue1 = TableFcmp[Condition].CC1; |
| 2488 *CondIfFalse = (*CondIfTrue1 != CondARM32::kNone) |
| 2489 ? CondARM32::AL |
| 2490 : InstARM32::getOppositeCondition(*CondIfTrue0); |
| 2491 } |
| 2492 } |
| 2493 } |
| 2494 |
| 2495 void TargetARM32::lowerFcmp(const InstFcmp *Instr) { |
| 2496 Variable *Dest = Instr->getDest(); |
2402 if (isVectorType(Dest->getType())) { | 2497 if (isVectorType(Dest->getType())) { |
2403 Variable *T = makeReg(Dest->getType()); | 2498 Variable *T = makeReg(Dest->getType()); |
2404 Context.insert(InstFakeDef::create(Func, T)); | 2499 Context.insert(InstFakeDef::create(Func, T)); |
2405 _mov(Dest, T); | 2500 _mov(Dest, T); |
2406 UnimplementedError(Func->getContext()->getFlags()); | 2501 UnimplementedError(Func->getContext()->getFlags()); |
2407 return; | 2502 return; |
2408 } | 2503 } |
2409 | 2504 |
2410 Variable *Src0R = legalizeToReg(Inst->getSrc(0)); | |
2411 Variable *Src1R = legalizeToReg(Inst->getSrc(1)); | |
2412 Variable *T = makeReg(IceType_i32); | 2505 Variable *T = makeReg(IceType_i32); |
2413 _vcmp(Src0R, Src1R); | 2506 Operand *_1 = Ctx->getConstantInt32(1); |
2414 _mov(T, Ctx->getConstantZero(IceType_i32)); | 2507 Operand *_0 = Ctx->getConstantZero(IceType_i32); |
2415 _vmrs(); | 2508 |
2416 Operand *One = Ctx->getConstantInt32(1); | 2509 CondARM32::Cond CondIfTrue0, CondIfTrue1, CondIfFalse; |
2417 InstFcmp::FCond Condition = Inst->getCondition(); | 2510 lowerFcmpCond(Instr, &CondIfTrue0, &CondIfTrue1, &CondIfFalse); |
2418 assert(Condition < llvm::array_lengthof(TableFcmp)); | 2511 |
2419 CondARM32::Cond CC0 = TableFcmp[Condition].CC0; | 2512 bool RedefineT = false; |
2420 CondARM32::Cond CC1 = TableFcmp[Condition].CC1; | 2513 if (CondIfFalse != CondARM32::kNone) { |
2421 if (CC0 != CondARM32::kNone) { | 2514 assert(!RedefineT); |
2422 _mov(T, One, CC0); | 2515 _mov(T, _0, CondIfFalse); |
2423 // If this mov is not a maybe mov, but an actual mov (i.e., CC0 == AL), we | 2516 RedefineT = true; |
2424 // don't want to _set_dest_redefined so that liveness + dead-code | |
2425 // elimination will get rid of the previous assignment (i.e., T = 0) above. | |
2426 // TODO(stichnot,jpp): We should be able to conditionally create the "T=0" | |
2427 // instruction based on CC0, instead of relying on DCE to remove it. | |
2428 if (CC0 != CondARM32::AL) | |
2429 _set_dest_redefined(); | |
2430 } | 2517 } |
2431 if (CC1 != CondARM32::kNone) { | 2518 |
2432 assert(CC0 != CondARM32::kNone); | 2519 if (CondIfTrue0 != CondARM32::kNone) { |
2433 assert(CC1 != CondARM32::AL); | 2520 if (RedefineT) { |
2434 _mov_redefined(T, One, CC1); | 2521 _mov_redefined(T, _1, CondIfTrue0); |
| 2522 } else { |
| 2523 _mov(T, _1, CondIfTrue0); |
| 2524 } |
| 2525 RedefineT = true; |
2435 } | 2526 } |
| 2527 |
| 2528 if (CondIfTrue1 != CondARM32::kNone) { |
| 2529 assert(RedefineT); |
| 2530 _mov_redefined(T, _1, CondIfTrue1); |
| 2531 } |
| 2532 |
2436 _mov(Dest, T); | 2533 _mov(Dest, T); |
2437 } | 2534 } |
2438 | 2535 |
2439 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { | 2536 void TargetARM32::lowerIcmpCond(const InstIcmp *Inst, |
2440 Variable *Dest = Inst->getDest(); | 2537 CondARM32::Cond *CondIfTrue, |
| 2538 CondARM32::Cond *CondIfFalse) { |
2441 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 2539 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
2442 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | 2540 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
2443 | 2541 |
2444 if (isVectorType(Dest->getType())) { | |
2445 Variable *T = makeReg(Dest->getType()); | |
2446 Context.insert(InstFakeDef::create(Func, T)); | |
2447 _mov(Dest, T); | |
2448 UnimplementedError(Func->getContext()->getFlags()); | |
2449 return; | |
2450 } | |
2451 | |
2452 // a=icmp cond, b, c ==> | 2542 // a=icmp cond, b, c ==> |
2453 // GCC does: | 2543 // GCC does: |
2454 // cmp b.hi, c.hi or cmp b.lo, c.lo | 2544 // cmp b.hi, c.hi or cmp b.lo, c.lo |
2455 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi | 2545 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi |
2456 // mov.<C1> t, #1 mov.<C1> t, #1 | 2546 // mov.<C1> t, #1 mov.<C1> t, #1 |
2457 // mov.<C2> t, #0 mov.<C2> t, #0 | 2547 // mov.<C2> t, #0 mov.<C2> t, #0 |
2458 // mov a, t mov a, t | 2548 // mov a, t mov a, t |
2459 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" | 2549 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" |
2460 // is used for signed compares. In some cases, b and c need to be swapped as | 2550 // is used for signed compares. In some cases, b and c need to be swapped as |
2461 // well. | 2551 // well. |
2462 // | 2552 // |
2463 // LLVM does: | 2553 // LLVM does: |
2464 // for EQ and NE: | 2554 // for EQ and NE: |
2465 // eor t1, b.hi, c.hi | 2555 // eor t1, b.hi, c.hi |
2466 // eor t2, b.lo, c.hi | 2556 // eor t2, b.lo, c.hi |
2467 // orrs t, t1, t2 | 2557 // orrs t, t1, t2 |
2468 // mov.<C> t, #1 | 2558 // mov.<C> t, #1 |
2469 // mov a, t | 2559 // mov a, t |
2470 // | 2560 // |
2471 // that's nice in that it's just as short but has fewer dependencies for | 2561 // that's nice in that it's just as short but has fewer dependencies for |
2472 // better ILP at the cost of more registers. | 2562 // better ILP at the cost of more registers. |
2473 // | 2563 // |
2474 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two | 2564 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two |
2475 // unconditional mov #0, two cmps, two conditional mov #1, and one | 2565 // unconditional mov #0, two cmps, two conditional mov #1, and one |
2476 // conditional reg mov. That has few dependencies for good ILP, but is a | 2566 // conditional reg mov. That has few dependencies for good ILP, but is a |
2477 // longer sequence. | 2567 // longer sequence. |
2478 // | 2568 // |
2479 // So, we are going with the GCC version since it's usually better (except | 2569 // So, we are going with the GCC version since it's usually better (except |
2480 // perhaps for eq/ne). We could revisit special-casing eq/ne later. | 2570 // perhaps for eq/ne). We could revisit special-casing eq/ne later. |
2481 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2571 |
2482 Constant *One = Ctx->getConstantInt32(1); | |
2483 if (Src0->getType() == IceType_i64) { | 2572 if (Src0->getType() == IceType_i64) { |
2484 InstIcmp::ICond Conditon = Inst->getCondition(); | 2573 InstIcmp::ICond Conditon = Inst->getCondition(); |
2485 size_t Index = static_cast<size_t>(Conditon); | 2574 size_t Index = static_cast<size_t>(Conditon); |
2486 assert(Index < llvm::array_lengthof(TableIcmp64)); | 2575 assert(Index < llvm::array_lengthof(TableIcmp64)); |
2487 Variable *Src0Lo, *Src0Hi; | 2576 Variable *Src0Lo, *Src0Hi; |
2488 Operand *Src1LoRF, *Src1HiRF; | 2577 Operand *Src1LoRF, *Src1HiRF; |
2489 if (TableIcmp64[Index].Swapped) { | 2578 if (TableIcmp64[Index].Swapped) { |
2490 Src0Lo = legalizeToReg(loOperand(Src1)); | 2579 Src0Lo = legalizeToReg(loOperand(Src1)); |
2491 Src0Hi = legalizeToReg(hiOperand(Src1)); | 2580 Src0Hi = legalizeToReg(hiOperand(Src1)); |
2492 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | 2581 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
2493 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | 2582 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); |
2494 } else { | 2583 } else { |
2495 Src0Lo = legalizeToReg(loOperand(Src0)); | 2584 Src0Lo = legalizeToReg(loOperand(Src0)); |
2496 Src0Hi = legalizeToReg(hiOperand(Src0)); | 2585 Src0Hi = legalizeToReg(hiOperand(Src0)); |
2497 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); | 2586 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); |
2498 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); | 2587 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); |
2499 } | 2588 } |
2500 Variable *T = makeReg(IceType_i32); | |
2501 if (TableIcmp64[Index].IsSigned) { | 2589 if (TableIcmp64[Index].IsSigned) { |
2502 Variable *ScratchReg = makeReg(IceType_i32); | 2590 Variable *ScratchReg = makeReg(IceType_i32); |
2503 _cmp(Src0Lo, Src1LoRF); | 2591 _cmp(Src0Lo, Src1LoRF); |
2504 _sbcs(ScratchReg, Src0Hi, Src1HiRF); | 2592 _sbcs(ScratchReg, Src0Hi, Src1HiRF); |
2505 // ScratchReg isn't going to be used, but we need the side-effect of | 2593 // ScratchReg isn't going to be used, but we need the side-effect of |
2506 // setting flags from this operation. | 2594 // setting flags from this operation. |
2507 Context.insert(InstFakeUse::create(Func, ScratchReg)); | 2595 Context.insert(InstFakeUse::create(Func, ScratchReg)); |
2508 } else { | 2596 } else { |
2509 _cmp(Src0Hi, Src1HiRF); | 2597 _cmp(Src0Hi, Src1HiRF); |
2510 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); | 2598 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); |
2511 } | 2599 } |
2512 _mov(T, One, TableIcmp64[Index].C1); | 2600 *CondIfTrue = TableIcmp64[Index].C1; |
2513 _mov_redefined(T, Zero, TableIcmp64[Index].C2); | 2601 *CondIfFalse = TableIcmp64[Index].C2; |
2514 _mov(Dest, T); | |
2515 return; | 2602 return; |
2516 } | 2603 } |
2517 | 2604 |
2518 // a=icmp cond b, c ==> | 2605 // a=icmp cond b, c ==> |
2519 // GCC does: | 2606 // GCC does: |
2520 // <u/s>xtb tb, b | 2607 // <u/s>xtb tb, b |
2521 // <u/s>xtb tc, c | 2608 // <u/s>xtb tc, c |
2522 // cmp tb, tc | 2609 // cmp tb, tc |
2523 // mov.C1 t, #0 | 2610 // mov.C1 t, #0 |
2524 // mov.C2 t, #1 | 2611 // mov.C2 t, #1 |
(...skipping 16 matching lines...) Expand all Loading... |
2541 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For | 2628 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For |
2542 // the unsigned case, for some reason it does similar to GCC and does a uxtb | 2629 // the unsigned case, for some reason it does similar to GCC and does a uxtb |
2543 // first. It's not clear to me why that special-casing is needed. | 2630 // first. It's not clear to me why that special-casing is needed. |
2544 // | 2631 // |
2545 // We'll go with the LLVM way for now, since it's shorter and has just as few | 2632 // We'll go with the LLVM way for now, since it's shorter and has just as few |
2546 // dependencies. | 2633 // dependencies. |
2547 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); | 2634 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); |
2548 assert(ShiftAmt >= 0); | 2635 assert(ShiftAmt >= 0); |
2549 Constant *ShiftConst = nullptr; | 2636 Constant *ShiftConst = nullptr; |
2550 Variable *Src0R = nullptr; | 2637 Variable *Src0R = nullptr; |
2551 Variable *T = makeReg(IceType_i32); | |
2552 if (ShiftAmt) { | 2638 if (ShiftAmt) { |
2553 ShiftConst = Ctx->getConstantInt32(ShiftAmt); | 2639 ShiftConst = Ctx->getConstantInt32(ShiftAmt); |
2554 Src0R = makeReg(IceType_i32); | 2640 Src0R = makeReg(IceType_i32); |
2555 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); | 2641 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); |
2556 } else { | 2642 } else { |
2557 Src0R = legalizeToReg(Src0); | 2643 Src0R = legalizeToReg(Src0); |
2558 } | 2644 } |
2559 _mov(T, Zero); | |
2560 if (ShiftAmt) { | 2645 if (ShiftAmt) { |
2561 Variable *Src1R = legalizeToReg(Src1); | 2646 Variable *Src1R = legalizeToReg(Src1); |
2562 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create( | 2647 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create( |
2563 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst); | 2648 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst); |
2564 _cmp(Src0R, Src1RShifted); | 2649 _cmp(Src0R, Src1RShifted); |
2565 } else { | 2650 } else { |
2566 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); | 2651 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); |
2567 _cmp(Src0R, Src1RF); | 2652 _cmp(Src0R, Src1RF); |
2568 } | 2653 } |
2569 _mov_redefined(T, One, getIcmp32Mapping(Inst->getCondition())); | 2654 *CondIfTrue = getIcmp32Mapping(Inst->getCondition()); |
| 2655 *CondIfFalse = InstARM32::getOppositeCondition(*CondIfTrue); |
| 2656 } |
| 2657 |
| 2658 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { |
| 2659 Variable *Dest = Inst->getDest(); |
| 2660 |
| 2661 if (isVectorType(Dest->getType())) { |
| 2662 Variable *T = makeReg(Dest->getType()); |
| 2663 Context.insert(InstFakeDef::create(Func, T)); |
| 2664 _mov(Dest, T); |
| 2665 UnimplementedError(Func->getContext()->getFlags()); |
| 2666 return; |
| 2667 } |
| 2668 |
| 2669 Constant *_0 = Ctx->getConstantZero(IceType_i32); |
| 2670 Constant *_1 = Ctx->getConstantInt32(1); |
| 2671 Variable *T = makeReg(IceType_i32); |
| 2672 |
| 2673 CondARM32::Cond CondIfTrue, CondIfFalse; |
| 2674 lowerIcmpCond(Inst, &CondIfTrue, &CondIfFalse); |
| 2675 |
| 2676 _mov(T, _0, CondIfFalse); |
| 2677 _mov_redefined(T, _1, CondIfTrue); |
2570 _mov(Dest, T); | 2678 _mov(Dest, T); |
| 2679 |
2571 return; | 2680 return; |
2572 } | 2681 } |
2573 | 2682 |
2574 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) { | 2683 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) { |
2575 (void)Inst; | 2684 (void)Inst; |
2576 UnimplementedError(Func->getContext()->getFlags()); | 2685 UnimplementedError(Func->getContext()->getFlags()); |
2577 } | 2686 } |
2578 | 2687 |
2579 namespace { | 2688 namespace { |
2580 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { | 2689 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { |
(...skipping 741 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3322 Operand *SrcF = Inst->getFalseOperand(); | 3431 Operand *SrcF = Inst->getFalseOperand(); |
3323 Operand *Condition = Inst->getCondition(); | 3432 Operand *Condition = Inst->getCondition(); |
3324 | 3433 |
3325 if (isVectorType(DestTy)) { | 3434 if (isVectorType(DestTy)) { |
3326 Variable *T = makeReg(DestTy); | 3435 Variable *T = makeReg(DestTy); |
3327 Context.insert(InstFakeDef::create(Func, T)); | 3436 Context.insert(InstFakeDef::create(Func, T)); |
3328 _mov(Dest, T); | 3437 _mov(Dest, T); |
3329 UnimplementedError(Func->getContext()->getFlags()); | 3438 UnimplementedError(Func->getContext()->getFlags()); |
3330 return; | 3439 return; |
3331 } | 3440 } |
3332 // TODO(jvoung): handle folding opportunities. | 3441 |
3333 // cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t | 3442 CondARM32::Cond CondIfTrue0, CondIfTrue1, CondIfFalse; |
3334 Variable *CmpOpnd0 = legalizeToReg(Condition); | 3443 if (!_mov_i1_to_flags(Condition, &CondIfTrue0, &CondIfTrue1, &CondIfFalse)) { |
3335 Type CmpOpnd0Ty = CmpOpnd0->getType(); | 3444 // "Condition" was not fold. |
3336 Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32); | 3445 // cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t |
3337 assert(CmpOpnd0Ty == IceType_i1); | 3446 Variable *CmpOpnd0 = legalizeToReg(Condition); |
3338 if (CmpOpnd0Ty != IceType_i32) | 3447 Type CmpOpnd0Ty = CmpOpnd0->getType(); |
3339 _uxt(CmpOpnd0, CmpOpnd0); | 3448 Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32); |
3340 _cmp(CmpOpnd0, CmpOpnd1); | 3449 assert(CmpOpnd0Ty == IceType_i1); |
3341 static constexpr CondARM32::Cond Cond = CondARM32::NE; | 3450 if (CmpOpnd0Ty != IceType_i32) |
| 3451 _uxt(CmpOpnd0, CmpOpnd0); |
| 3452 _cmp(CmpOpnd0, CmpOpnd1); |
| 3453 CondIfTrue0 = CondARM32::NE; |
| 3454 CondIfTrue1 = CondARM32::kNone; |
| 3455 CondIfFalse = CondARM32::EQ; |
| 3456 } |
| 3457 |
3342 if (DestTy == IceType_i64) { | 3458 if (DestTy == IceType_i64) { |
3343 SrcT = legalizeUndef(SrcT); | 3459 SrcT = legalizeUndef(SrcT); |
3344 SrcF = legalizeUndef(SrcF); | 3460 SrcF = legalizeUndef(SrcF); |
3345 // Set the low portion. | 3461 // Set the low portion. |
3346 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3462 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 3463 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex); |
3347 Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg | Legal_Flex); | 3464 Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg | Legal_Flex); |
3348 Variable *TLo = makeReg(SrcFLo->getType()); | 3465 Variable *TLo = makeReg(SrcFLo->getType()); |
3349 _mov(TLo, SrcFLo); | 3466 bool RedefineTLo = false; |
3350 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex); | 3467 if (CondIfFalse != CondARM32::kNone) { |
3351 _mov_redefined(TLo, SrcTLo, Cond); | 3468 _mov(TLo, SrcFLo, CondIfFalse); |
| 3469 RedefineTLo = true; |
| 3470 } |
| 3471 if (CondIfTrue0 != CondARM32::kNone) { |
| 3472 if (!RedefineTLo) |
| 3473 _mov(TLo, SrcTLo, CondIfTrue0); |
| 3474 else |
| 3475 _mov_redefined(TLo, SrcTLo, CondIfTrue0); |
| 3476 RedefineTLo = true; |
| 3477 } |
| 3478 if (CondIfTrue1 != CondARM32::kNone) { |
| 3479 assert(RedefineTLo); |
| 3480 _mov_redefined(TLo, SrcTLo, CondIfTrue1); |
| 3481 } |
3352 _mov(DestLo, TLo); | 3482 _mov(DestLo, TLo); |
| 3483 |
3353 // Set the high portion. | 3484 // Set the high portion. |
3354 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3485 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 3486 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex); |
3355 Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg | Legal_Flex); | 3487 Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg | Legal_Flex); |
3356 Variable *THi = makeReg(SrcFHi->getType()); | 3488 Variable *THi = makeReg(SrcFHi->getType()); |
3357 _mov(THi, SrcFHi); | 3489 bool RedefineTHi = false; |
3358 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex); | 3490 if (CondIfFalse != CondARM32::kNone) { |
3359 _mov_redefined(THi, SrcTHi, Cond); | 3491 _mov(THi, SrcFHi, CondIfFalse); |
| 3492 RedefineTHi = true; |
| 3493 } |
| 3494 if (CondIfTrue0 != CondARM32::kNone) { |
| 3495 if (!RedefineTHi) |
| 3496 _mov(THi, SrcTHi, CondIfTrue0); |
| 3497 else |
| 3498 _mov_redefined(THi, SrcTHi, CondIfTrue0); |
| 3499 RedefineTHi = true; |
| 3500 } |
| 3501 if (CondIfTrue1 != CondARM32::kNone) { |
| 3502 assert(RedefineTHi); |
| 3503 _mov_redefined(THi, SrcTHi, CondIfTrue1); |
| 3504 } |
3360 _mov(DestHi, THi); | 3505 _mov(DestHi, THi); |
3361 return; | 3506 return; |
3362 } | 3507 } |
3363 | 3508 |
3364 if (isFloatingType(DestTy)) { | 3509 if (isFloatingType(DestTy)) { |
| 3510 SrcT = legalizeToReg(SrcT); |
| 3511 SrcF = legalizeToReg(SrcF); |
3365 Variable *T = makeReg(DestTy); | 3512 Variable *T = makeReg(DestTy); |
3366 SrcF = legalizeToReg(SrcF); | |
3367 assert(DestTy == SrcF->getType()); | 3513 assert(DestTy == SrcF->getType()); |
3368 _mov(T, SrcF); | 3514 bool RedefineT = false; |
3369 SrcT = legalizeToReg(SrcT); | 3515 if (CondIfFalse != CondARM32::kNone) { |
| 3516 _mov(T, SrcF, CondIfFalse); |
| 3517 RedefineT = true; |
| 3518 } |
| 3519 if (CondIfTrue0 != CondARM32::kNone) { |
| 3520 if (!RedefineT) |
| 3521 _mov(T, SrcT, CondIfTrue0); |
| 3522 else |
| 3523 _mov_redefined(T, SrcT, CondIfTrue0); |
| 3524 RedefineT = true; |
| 3525 } |
| 3526 if (CondIfTrue1 != CondARM32::kNone) { |
| 3527 assert(RedefineT); |
| 3528 _mov_redefined(T, SrcT, CondIfTrue1); |
| 3529 } |
3370 assert(DestTy == SrcT->getType()); | 3530 assert(DestTy == SrcT->getType()); |
3371 _mov(T, SrcT, Cond); | |
3372 _set_dest_redefined(); | |
3373 _mov(Dest, T); | 3531 _mov(Dest, T); |
3374 return; | 3532 return; |
3375 } | 3533 } |
3376 | 3534 |
| 3535 Variable *T = makeReg(SrcF->getType()); |
| 3536 SrcT = legalize(SrcT, Legal_Reg | Legal_Flex); |
3377 SrcF = legalize(SrcF, Legal_Reg | Legal_Flex); | 3537 SrcF = legalize(SrcF, Legal_Reg | Legal_Flex); |
3378 Variable *T = makeReg(SrcF->getType()); | 3538 bool RedefineT = false; |
3379 _mov(T, SrcF); | 3539 if (CondIfFalse != CondARM32::kNone) { |
3380 SrcT = legalize(SrcT, Legal_Reg | Legal_Flex); | 3540 _mov(T, SrcF, CondIfFalse); |
3381 _mov_redefined(T, SrcT, Cond); | 3541 RedefineT = true; |
| 3542 } |
| 3543 if (CondIfTrue0 != CondARM32::kNone) { |
| 3544 if (!RedefineT) |
| 3545 _mov(T, SrcT, CondIfTrue0); |
| 3546 else |
| 3547 _mov_redefined(T, SrcT, CondIfTrue0); |
| 3548 RedefineT = true; |
| 3549 } |
| 3550 if (CondIfTrue1 != CondARM32::kNone) { |
| 3551 assert(RedefineT); |
| 3552 _mov_redefined(T, SrcT, CondIfTrue1); |
| 3553 } |
3382 _mov(Dest, T); | 3554 _mov(Dest, T); |
3383 } | 3555 } |
3384 | 3556 |
3385 void TargetARM32::lowerStore(const InstStore *Inst) { | 3557 void TargetARM32::lowerStore(const InstStore *Inst) { |
3386 Operand *Value = Inst->getData(); | 3558 Operand *Value = Inst->getData(); |
3387 Operand *Addr = Inst->getAddr(); | 3559 Operand *Addr = Inst->getAddr(); |
3388 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); | 3560 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); |
3389 Type Ty = NewAddr->getType(); | 3561 Type Ty = NewAddr->getType(); |
3390 | 3562 |
3391 if (Ty == IceType_i64) { | 3563 if (Ty == IceType_i64) { |
(...skipping 387 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3779 | 3951 |
3780 void TargetARM32::emit(const ConstantDouble *C) const { | 3952 void TargetARM32::emit(const ConstantDouble *C) const { |
3781 (void)C; | 3953 (void)C; |
3782 UnimplementedError(Ctx->getFlags()); | 3954 UnimplementedError(Ctx->getFlags()); |
3783 } | 3955 } |
3784 | 3956 |
3785 void TargetARM32::emit(const ConstantUndef *) const { | 3957 void TargetARM32::emit(const ConstantUndef *) const { |
3786 llvm::report_fatal_error("undef value encountered by emitter."); | 3958 llvm::report_fatal_error("undef value encountered by emitter."); |
3787 } | 3959 } |
3788 | 3960 |
| 3961 void TargetARM32::lowerTruncToFlags(Operand *Src, CondARM32::Cond *CondIfTrue, |
| 3962 CondARM32::Cond *CondIfFalse) { |
| 3963 Operand *_1 = Ctx->getConstantInt32(1); |
| 3964 Variable *SrcR = |
| 3965 legalizeToReg(Src->getType() == IceType_i64 ? loOperand(Src) : Src); |
| 3966 _tst(SrcR, _1); |
| 3967 *CondIfTrue = CondARM32::NE; // NE <-> APSR.Z == 0 |
| 3968 *CondIfFalse = CondARM32::EQ; // EQ <-> APSR.Z == 1 |
| 3969 } |
| 3970 |
| 3971 bool TargetARM32::_mov_i1_to_flags(Operand *Boolean, |
| 3972 CondARM32::Cond *CondIfTrue0, |
| 3973 CondARM32::Cond *CondIfTrue1, |
| 3974 CondARM32::Cond *CondIfFalse) { |
| 3975 *CondIfTrue0 = CondARM32::kNone; |
| 3976 *CondIfTrue1 = CondARM32::kNone; |
| 3977 *CondIfFalse = CondARM32::AL; |
| 3978 bool FoldOK = false; |
| 3979 if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) { |
| 3980 if (const auto *IcmpProducer = llvm::dyn_cast<InstIcmp>(Producer)) { |
| 3981 lowerIcmpCond(IcmpProducer, CondIfTrue0, CondIfFalse); |
| 3982 FoldOK = true; |
| 3983 } else if (const auto *FcmpProducer = llvm::dyn_cast<InstFcmp>(Producer)) { |
| 3984 lowerFcmpCond(FcmpProducer, CondIfTrue0, CondIfTrue1, CondIfFalse); |
| 3985 FoldOK = true; |
| 3986 } else if (const auto *CastProducer = llvm::dyn_cast<InstCast>(Producer)) { |
| 3987 assert(CastProducer->getCastKind() == InstCast::Trunc); |
| 3988 lowerTruncToFlags(CastProducer->getSrc(0), CondIfTrue0, CondIfFalse); |
| 3989 FoldOK = true; |
| 3990 } |
| 3991 } |
| 3992 return FoldOK; |
| 3993 } |
| 3994 |
| 3995 namespace { |
| 3996 namespace BoolFolding { |
| 3997 bool shouldTrackProducer(const Inst &Instr) { |
| 3998 switch (static_cast<uint32_t>(Instr.getKind())) { |
| 3999 case Inst::Icmp: |
| 4000 return true; |
| 4001 case Inst::Fcmp: |
| 4002 return true; |
| 4003 } |
| 4004 if (const auto *Cast = llvm::dyn_cast<InstCast>(&Instr)) { |
| 4005 switch (static_cast<uint32_t>(Cast->getCastKind())) { |
| 4006 case InstCast::Trunc: |
| 4007 return true; |
| 4008 } |
| 4009 } |
| 4010 return false; |
| 4011 } |
| 4012 |
| 4013 bool isValidConsumer(const Inst &Instr) { |
| 4014 switch (static_cast<uint32_t>(Instr.getKind())) { |
| 4015 case Inst::Br: |
| 4016 return true; |
| 4017 case Inst::Select: |
| 4018 return !isVectorType(Instr.getDest()->getType()); |
| 4019 } |
| 4020 if (const auto *Cast = llvm::dyn_cast<InstCast>(&Instr)) { |
| 4021 switch (static_cast<uint32_t>(Cast->getCastKind())) { |
| 4022 case InstCast::Sext: |
| 4023 return !isVectorType(Instr.getDest()->getType()); |
| 4024 case InstCast::Zext: |
| 4025 return !isVectorType(Instr.getDest()->getType()); |
| 4026 } |
| 4027 } |
| 4028 return false; |
| 4029 } |
| 4030 } // end of namespace BoolFolding |
| 4031 } // end of anonymous namespace |
| 4032 |
| 4033 void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) { |
| 4034 for (Inst &Instr : Node->getInsts()) { |
| 4035 // Check whether Instr is a valid producer. |
| 4036 Variable *Dest = Instr.getDest(); |
| 4037 if (!Instr.isDeleted() // only consider non-deleted instructions; and |
| 4038 && Dest // only instructions with an actual dest var; and |
| 4039 && Dest->getType() == IceType_i1 // only bool-type dest vars; and |
| 4040 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr. |
| 4041 KnownComputations.emplace(Dest->getIndex(), BoolComputationEntry(&Instr)); |
| 4042 } |
| 4043 // Check each src variable against the map. |
| 4044 FOREACH_VAR_IN_INST(Var, Instr) { |
| 4045 SizeT VarNum = Var->getIndex(); |
| 4046 auto ComputationIter = KnownComputations.find(VarNum); |
| 4047 if (ComputationIter == KnownComputations.end()) { |
| 4048 continue; |
| 4049 } |
| 4050 |
| 4051 if (IndexOfVarOperandInInst(Var) != 0 || |
| 4052 !BoolFolding::isValidConsumer(Instr)) { |
| 4053 // All valid consumers use Var as the first source operand |
| 4054 KnownComputations.erase(VarNum); |
| 4055 continue; |
| 4056 } |
| 4057 |
| 4058 if (Instr.isLastUse(Var)) { |
| 4059 ComputationIter->second.IsLiveOut = false; |
| 4060 } |
| 4061 } |
| 4062 } |
| 4063 |
| 4064 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end(); |
| 4065 Iter != End;) { |
| 4066 // Disable the folding if its dest may be live beyond this block. |
| 4067 if (Iter->second.IsLiveOut) { |
| 4068 Iter = KnownComputations.erase(Iter); |
| 4069 continue; |
| 4070 } |
| 4071 |
| 4072 // Mark as "dead" rather than outright deleting. This is so that other |
| 4073 // peephole style optimizations during or before lowering have access to |
| 4074 // this instruction in undeleted form. See for example |
| 4075 // tryOptimizedCmpxchgCmpBr(). |
| 4076 Iter->second.Instr->setDead(); |
| 4077 ++Iter; |
| 4078 } |
| 4079 } |
| 4080 |
3789 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx) | 4081 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx) |
3790 : TargetDataLowering(Ctx) {} | 4082 : TargetDataLowering(Ctx) {} |
3791 | 4083 |
3792 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars, | 4084 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars, |
3793 const IceString &SectionSuffix) { | 4085 const IceString &SectionSuffix) { |
3794 switch (Ctx->getFlags().getOutFileType()) { | 4086 switch (Ctx->getFlags().getOutFileType()) { |
3795 case FT_Elf: { | 4087 case FT_Elf: { |
3796 ELFObjectWriter *Writer = Ctx->getObjectWriter(); | 4088 ELFObjectWriter *Writer = Ctx->getObjectWriter(); |
3797 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix); | 4089 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix); |
3798 } break; | 4090 } break; |
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3970 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; | 4262 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; |
3971 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { | 4263 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { |
3972 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; | 4264 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; |
3973 } | 4265 } |
3974 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 4266 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
3975 // However, for compatibility with current NaCl LLVM, don't claim that. | 4267 // However, for compatibility with current NaCl LLVM, don't claim that. |
3976 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 4268 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
3977 } | 4269 } |
3978 | 4270 |
3979 } // end of namespace Ice | 4271 } // end of namespace Ice |
OLD | NEW |