OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
11 /// This file implements the TargetLoweringARM32 class, which consists almost | 11 /// This file implements the TargetLoweringARM32 class, which consists almost |
12 /// entirely of the lowering sequence for each high-level instruction. | 12 /// entirely of the lowering sequence for each high-level instruction. |
13 /// | 13 /// |
14 //===----------------------------------------------------------------------===// | 14 //===----------------------------------------------------------------------===// |
15 #include "IceTargetLoweringARM32.h" | 15 #include "IceTargetLoweringARM32.h" |
16 | 16 |
17 #include "IceCfg.h" | 17 #include "IceCfg.h" |
18 #include "IceCfgNode.h" | 18 #include "IceCfgNode.h" |
19 #include "IceClFlags.h" | 19 #include "IceClFlags.h" |
20 #include "IceDefs.h" | 20 #include "IceDefs.h" |
21 #include "IceELFObjectWriter.h" | 21 #include "IceELFObjectWriter.h" |
22 #include "IceGlobalInits.h" | 22 #include "IceGlobalInits.h" |
23 #include "IceInstARM32.def" | 23 #include "IceInstARM32.def" |
24 #include "IceInstARM32.h" | 24 #include "IceInstARM32.h" |
25 #include "IceInstVarIter.h" | |
25 #include "IceLiveness.h" | 26 #include "IceLiveness.h" |
26 #include "IceOperand.h" | 27 #include "IceOperand.h" |
27 #include "IcePhiLoweringImpl.h" | 28 #include "IcePhiLoweringImpl.h" |
28 #include "IceRegistersARM32.h" | 29 #include "IceRegistersARM32.h" |
29 #include "IceTargetLoweringARM32.def" | 30 #include "IceTargetLoweringARM32.def" |
30 #include "IceUtils.h" | 31 #include "IceUtils.h" |
31 #include "llvm/Support/MathExtras.h" | 32 #include "llvm/Support/MathExtras.h" |
32 | 33 |
33 #include <algorithm> | 34 #include <algorithm> |
34 #include <utility> | 35 #include <utility> |
(...skipping 1761 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1796 _mov(Dest, SrcR); | 1797 _mov(Dest, SrcR); |
1797 } else if (isFloatingType(Dest->getType())) { | 1798 } else if (isFloatingType(Dest->getType())) { |
1798 Variable *SrcR = legalizeToReg(NewSrc); | 1799 Variable *SrcR = legalizeToReg(NewSrc); |
1799 _mov(Dest, SrcR); | 1800 _mov(Dest, SrcR); |
1800 } else { | 1801 } else { |
1801 _mov(Dest, NewSrc); | 1802 _mov(Dest, NewSrc); |
1802 } | 1803 } |
1803 } | 1804 } |
1804 } | 1805 } |
1805 | 1806 |
1806 void TargetARM32::lowerBr(const InstBr *Inst) { | 1807 void TargetARM32::lowerBr(const InstBr *Instr) { |
1807 if (Inst->isUnconditional()) { | 1808 if (Instr->isUnconditional()) { |
1808 _br(Inst->getTargetUnconditional()); | 1809 _br(Instr->getTargetUnconditional()); |
1809 return; | 1810 return; |
1810 } | 1811 } |
1811 Operand *Cond = Inst->getCondition(); | 1812 Operand *Cond = Instr->getCondition(); |
1812 // TODO(jvoung): Handle folding opportunities. | |
1813 | 1813 |
1814 Type Ty = Cond->getType(); | 1814 CondARM32::Cond BrCondTrue0 = CondARM32::NE; |
1815 Variable *Src0R = legalizeToReg(Cond); | 1815 CondARM32::Cond BrCondTrue1 = CondARM32::kNone; |
1816 assert(Ty == IceType_i1); | 1816 CondARM32::Cond BrCondFalse = CondARM32::kNone; |
1817 if (Ty != IceType_i32) | 1817 if (!_mov_i1_to_flags(Cond, &BrCondTrue0, &BrCondTrue1, &BrCondFalse)) { |
1818 _uxt(Src0R, Src0R); | 1818 // "Cond" was not fold. |
1819 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1819 Type Ty = Cond->getType(); |
1820 _cmp(Src0R, Zero); | 1820 Variable *Src0R = legalizeToReg(Cond); |
1821 _br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE); | 1821 assert(Ty == IceType_i1); |
1822 if (Ty != IceType_i32) | |
1823 _uxt(Src0R, Src0R); | |
1824 Constant *Zero = Ctx->getConstantZero(IceType_i32); | |
Jim Stichnoth
2015/11/03 20:04:28
Use "_0" for consistency?
John
2015/11/05 20:25:39
Done.
| |
1825 _cmp(Src0R, Zero); | |
1826 BrCondTrue0 = CondARM32::NE; | |
1827 } | |
1828 | |
1829 if (BrCondTrue1 != CondARM32::kNone) { | |
1830 _br(Instr->getTargetTrue(), BrCondTrue1); | |
1831 } | |
1832 | |
1833 if (BrCondTrue0 == CondARM32::kNone) { | |
1834 assert(BrCondTrue1 == CondARM32::kNone); | |
1835 _br(Instr->getTargetFalse()); | |
1836 return; | |
1837 } | |
1838 | |
1839 if (BrCondTrue0 == CondARM32::AL) { | |
1840 assert(BrCondTrue1 == CondARM32::kNone); | |
1841 assert(BrCondFalse == CondARM32::kNone); | |
1842 _br(Instr->getTargetTrue()); | |
1843 return; | |
1844 } | |
1845 | |
1846 _br(Instr->getTargetTrue(), Instr->getTargetFalse(), BrCondTrue0); | |
1822 } | 1847 } |
1823 | 1848 |
1824 void TargetARM32::lowerCall(const InstCall *Instr) { | 1849 void TargetARM32::lowerCall(const InstCall *Instr) { |
1825 MaybeLeafFunc = false; | 1850 MaybeLeafFunc = false; |
1826 NeedsStackAlignment = true; | 1851 NeedsStackAlignment = true; |
1827 | 1852 |
1828 // Assign arguments to registers and stack. Also reserve stack. | 1853 // Assign arguments to registers and stack. Also reserve stack. |
1829 TargetARM32::CallingConv CC; | 1854 TargetARM32::CallingConv CC; |
1830 // Pair of Arg Operand -> GPR number assignments. | 1855 // Pair of Arg Operand -> GPR number assignments. |
1831 llvm::SmallVector<std::pair<Operand *, int32_t>, | 1856 llvm::SmallVector<std::pair<Operand *, int32_t>, |
(...skipping 211 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2043 UnimplementedError(Func->getContext()->getFlags()); | 2068 UnimplementedError(Func->getContext()->getFlags()); |
2044 } else if (Dest->getType() == IceType_i64) { | 2069 } else if (Dest->getType() == IceType_i64) { |
2045 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2 | 2070 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2 |
2046 Constant *ShiftAmt = Ctx->getConstantInt32(31); | 2071 Constant *ShiftAmt = Ctx->getConstantInt32(31); |
2047 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 2072 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
2048 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2073 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2049 Variable *T_Lo = makeReg(DestLo->getType()); | 2074 Variable *T_Lo = makeReg(DestLo->getType()); |
2050 if (Src0->getType() == IceType_i32) { | 2075 if (Src0->getType() == IceType_i32) { |
2051 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); | 2076 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
2052 _mov(T_Lo, Src0RF); | 2077 _mov(T_Lo, Src0RF); |
2053 } else if (Src0->getType() == IceType_i1) { | 2078 } else if (Src0->getType() != IceType_i1) { |
2054 Variable *Src0R = legalizeToReg(Src0); | |
2055 _lsl(T_Lo, Src0R, ShiftAmt); | |
2056 _asr(T_Lo, T_Lo, ShiftAmt); | |
2057 } else { | |
2058 Variable *Src0R = legalizeToReg(Src0); | 2079 Variable *Src0R = legalizeToReg(Src0); |
2059 _sxt(T_Lo, Src0R); | 2080 _sxt(T_Lo, Src0R); |
2081 } else { | |
2082 CondARM32::Cond CondTrue0, CondTrue1, CondFalse; | |
2083 if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) { | |
2084 // Handle bool folding. | |
2085 Constant *_0 = Ctx->getConstantZero(IceType_i32); | |
2086 Operand *_m1 = | |
2087 legalize(Ctx->getConstantInt32(-1), Legal_Reg | Legal_Flex); | |
2088 _cmov(T_Lo, _m1, CondTrue0, CondTrue1, _0, CondFalse); | |
2089 } else { | |
2090 Variable *Src0R = legalizeToReg(Src0); | |
2091 _lsl(T_Lo, Src0R, ShiftAmt); | |
2092 _asr(T_Lo, T_Lo, ShiftAmt); | |
2093 } | |
2060 } | 2094 } |
2061 _mov(DestLo, T_Lo); | 2095 _mov(DestLo, T_Lo); |
2062 Variable *T_Hi = makeReg(DestHi->getType()); | 2096 Variable *T_Hi = makeReg(DestHi->getType()); |
2063 if (Src0->getType() != IceType_i1) { | 2097 if (Src0->getType() != IceType_i1) { |
2064 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo, | 2098 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo, |
2065 OperandARM32::ASR, ShiftAmt)); | 2099 OperandARM32::ASR, ShiftAmt)); |
2066 } else { | 2100 } else { |
2067 // For i1, the asr instruction is already done above. | 2101 // For i1, the asr instruction is already done above. |
2068 _mov(T_Hi, T_Lo); | 2102 _mov(T_Hi, T_Lo); |
2069 } | 2103 } |
2070 _mov(DestHi, T_Hi); | 2104 _mov(DestHi, T_Hi); |
2071 } else if (Src0->getType() == IceType_i1) { | 2105 } else if (Src0->getType() != IceType_i1) { |
2072 // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1. | |
2073 // lsl t1, src_reg, 31 | |
2074 // asr t1, t1, 31 | |
2075 // dst = t1 | |
2076 Variable *Src0R = legalizeToReg(Src0); | |
2077 Constant *ShiftAmt = Ctx->getConstantInt32(31); | |
2078 Variable *T = makeReg(Dest->getType()); | |
2079 _lsl(T, Src0R, ShiftAmt); | |
2080 _asr(T, T, ShiftAmt); | |
2081 _mov(Dest, T); | |
2082 } else { | |
2083 // t1 = sxt src; dst = t1 | 2106 // t1 = sxt src; dst = t1 |
2084 Variable *Src0R = legalizeToReg(Src0); | 2107 Variable *Src0R = legalizeToReg(Src0); |
2085 Variable *T = makeReg(Dest->getType()); | 2108 Variable *T = makeReg(Dest->getType()); |
2086 _sxt(T, Src0R); | 2109 _sxt(T, Src0R); |
2087 _mov(Dest, T); | 2110 _mov(Dest, T); |
2111 } else { | |
2112 Variable *T = makeReg(Dest->getType()); | |
2113 CondARM32::Cond CondTrue0, CondTrue1, CondFalse; | |
2114 if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) { | |
2115 // Handle bool folding. | |
2116 Constant *_0 = Ctx->getConstantZero(IceType_i32); | |
2117 Operand *_m1 = | |
2118 legalize(Ctx->getConstantInt32(-1), Legal_Reg | Legal_Flex); | |
2119 _cmov(T, _m1, CondTrue0, CondTrue1, _0, CondFalse); | |
2120 } else { | |
2121 // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1. | |
2122 // lsl t1, src_reg, 31 | |
2123 // asr t1, t1, 31 | |
2124 // dst = t1 | |
2125 Variable *Src0R = legalizeToReg(Src0); | |
2126 Constant *ShiftAmt = Ctx->getConstantInt32(31); | |
2127 _lsl(T, Src0R, ShiftAmt); | |
2128 _asr(T, T, ShiftAmt); | |
2129 } | |
2130 _mov(Dest, T); | |
2088 } | 2131 } |
2089 break; | 2132 break; |
2090 } | 2133 } |
2091 case InstCast::Zext: { | 2134 case InstCast::Zext: { |
2092 if (isVectorType(Dest->getType())) { | 2135 if (isVectorType(Dest->getType())) { |
2093 Variable *T = makeReg(Dest->getType()); | 2136 Variable *T = makeReg(Dest->getType()); |
2094 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0))); | 2137 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0))); |
2095 _mov(Dest, T); | 2138 _mov(Dest, T); |
2096 UnimplementedError(Func->getContext()->getFlags()); | 2139 UnimplementedError(Func->getContext()->getFlags()); |
2097 } else if (Dest->getType() == IceType_i64) { | 2140 } else if (Dest->getType() == IceType_i64) { |
2098 // t1=uxtb src; dst.lo=t1; dst.hi=0 | 2141 // t1=uxtb src; dst.lo=t1; dst.hi=0 |
2099 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2142 Constant *_0 = Ctx->getConstantZero(IceType_i32); |
2143 Constant *_1 = Ctx->getConstantInt32(1); | |
2100 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 2144 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
2101 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2145 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2102 Variable *T_Lo = makeReg(DestLo->getType()); | 2146 Variable *T_Lo = makeReg(DestLo->getType()); |
2147 | |
2148 CondARM32::Cond CondTrue0, CondTrue1, CondFalse; | |
2149 if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) { | |
2150 // Handle folding opportunities. | |
2151 Variable *T_Hi = makeReg(DestLo->getType()); | |
2152 _mov(T_Hi, _0); | |
2153 _mov(DestHi, T_Hi); | |
2154 _cmov(T_Lo, _1, CondTrue0, CondTrue1, _0, CondFalse); | |
2155 _mov(DestLo, T_Lo); | |
2156 return; | |
2157 } | |
2158 | |
2103 // i32 and i1 can just take up the whole register. i32 doesn't need uxt, | 2159 // i32 and i1 can just take up the whole register. i32 doesn't need uxt, |
2104 // while i1 will have an and mask later anyway. | 2160 // while i1 will have an and mask later anyway. |
2105 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) { | 2161 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) { |
2106 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); | 2162 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
2107 _mov(T_Lo, Src0RF); | 2163 _mov(T_Lo, Src0RF); |
2108 } else { | 2164 } else { |
2109 Variable *Src0R = legalizeToReg(Src0); | 2165 Variable *Src0R = legalizeToReg(Src0); |
2110 _uxt(T_Lo, Src0R); | 2166 _uxt(T_Lo, Src0R); |
2111 } | 2167 } |
2112 if (Src0->getType() == IceType_i1) { | 2168 if (Src0->getType() == IceType_i1) { |
2113 Constant *One = Ctx->getConstantInt32(1); | 2169 Constant *One = Ctx->getConstantInt32(1); |
2114 _and(T_Lo, T_Lo, One); | 2170 _and(T_Lo, T_Lo, One); |
2115 } | 2171 } |
2116 _mov(DestLo, T_Lo); | 2172 _mov(DestLo, T_Lo); |
2117 Variable *T_Hi = makeReg(DestLo->getType()); | 2173 Variable *T_Hi = makeReg(DestLo->getType()); |
2118 _mov(T_Hi, Zero); | 2174 _mov(T_Hi, _0); |
2119 _mov(DestHi, T_Hi); | 2175 _mov(DestHi, T_Hi); |
2120 } else if (Src0->getType() == IceType_i1) { | 2176 } else if (Src0->getType() == IceType_i1) { |
2177 Constant *_1 = Ctx->getConstantInt32(1); | |
2178 Variable *T = makeReg(Dest->getType()); | |
2179 | |
2180 CondARM32::Cond CondTrue0, CondTrue1, CondFalse; | |
2181 if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) { | |
2182 // Handle folding opportunities. | |
2183 Constant *_0 = Ctx->getConstantZero(IceType_i32); | |
2184 _cmov(T, _1, CondTrue0, CondTrue1, _0, CondFalse); | |
2185 _mov(Dest, T); | |
2186 return; | |
2187 } | |
2188 | |
2121 // t = Src0; t &= 1; Dest = t | 2189 // t = Src0; t &= 1; Dest = t |
2122 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); | 2190 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
2123 Constant *One = Ctx->getConstantInt32(1); | |
2124 Variable *T = makeReg(Dest->getType()); | |
2125 // Just use _mov instead of _uxt since all registers are 32-bit. _uxt | 2191 // Just use _mov instead of _uxt since all registers are 32-bit. _uxt |
2126 // requires the source to be a register so could have required a _mov | 2192 // requires the source to be a register so could have required a _mov |
2127 // from legalize anyway. | 2193 // from legalize anyway. |
2128 _mov(T, Src0RF); | 2194 _mov(T, Src0RF); |
2129 _and(T, T, One); | 2195 _and(T, T, _1); |
2130 _mov(Dest, T); | 2196 _mov(Dest, T); |
2131 } else { | 2197 } else { |
2132 // t1 = uxt src; dst = t1 | 2198 // t1 = uxt src; dst = t1 |
2133 Variable *Src0R = legalizeToReg(Src0); | 2199 Variable *Src0R = legalizeToReg(Src0); |
2134 Variable *T = makeReg(Dest->getType()); | 2200 Variable *T = makeReg(Dest->getType()); |
2135 _uxt(T, Src0R); | 2201 _uxt(T, Src0R); |
2136 _mov(Dest, T); | 2202 _mov(Dest, T); |
2137 } | 2203 } |
2138 break; | 2204 break; |
2139 } | 2205 } |
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2390 CondARM32::Cond CC1; | 2456 CondARM32::Cond CC1; |
2391 } TableFcmp[] = { | 2457 } TableFcmp[] = { |
2392 #define X(val, CC0, CC1) \ | 2458 #define X(val, CC0, CC1) \ |
2393 { CondARM32::CC0, CondARM32::CC1 } \ | 2459 { CondARM32::CC0, CondARM32::CC1 } \ |
2394 , | 2460 , |
2395 FCMPARM32_TABLE | 2461 FCMPARM32_TABLE |
2396 #undef X | 2462 #undef X |
2397 }; | 2463 }; |
2398 } // end of anonymous namespace | 2464 } // end of anonymous namespace |
2399 | 2465 |
2400 void TargetARM32::lowerFcmp(const InstFcmp *Inst) { | 2466 void TargetARM32::lowerFcmpCond(const InstFcmp *Instr, |
2401 Variable *Dest = Inst->getDest(); | 2467 CondARM32::Cond *CondIfTrue0, |
2468 CondARM32::Cond *CondIfTrue1, | |
2469 CondARM32::Cond *CondIfFalse) { | |
2470 InstFcmp::FCond Condition = Instr->getCondition(); | |
2471 switch (Condition) { | |
2472 case InstFcmp::False: | |
2473 *CondIfFalse = CondARM32::AL; | |
2474 *CondIfTrue0 = *CondIfTrue1 = CondARM32::kNone; | |
2475 break; | |
2476 case InstFcmp::True: | |
2477 *CondIfFalse = *CondIfTrue1 = CondARM32::kNone; | |
2478 *CondIfTrue0 = CondARM32::AL; | |
2479 break; | |
2480 default: { | |
2481 Variable *Src0R = legalizeToReg(Instr->getSrc(0)); | |
2482 Variable *Src1R = legalizeToReg(Instr->getSrc(1)); | |
2483 _vcmp(Src0R, Src1R); | |
2484 _vmrs(); | |
2485 assert(Condition < llvm::array_lengthof(TableFcmp)); | |
2486 *CondIfTrue0 = TableFcmp[Condition].CC0; | |
2487 *CondIfTrue1 = TableFcmp[Condition].CC1; | |
2488 *CondIfFalse = (*CondIfTrue1 != CondARM32::kNone) | |
2489 ? CondARM32::AL | |
2490 : InstARM32::getOppositeCondition(*CondIfTrue0); | |
2491 } | |
2492 } | |
2493 } | |
2494 | |
2495 void TargetARM32::lowerFcmp(const InstFcmp *Instr) { | |
2496 Variable *Dest = Instr->getDest(); | |
2402 if (isVectorType(Dest->getType())) { | 2497 if (isVectorType(Dest->getType())) { |
2403 Variable *T = makeReg(Dest->getType()); | 2498 Variable *T = makeReg(Dest->getType()); |
2404 Context.insert(InstFakeDef::create(Func, T)); | 2499 Context.insert(InstFakeDef::create(Func, T)); |
2405 _mov(Dest, T); | 2500 _mov(Dest, T); |
2406 UnimplementedError(Func->getContext()->getFlags()); | 2501 UnimplementedError(Func->getContext()->getFlags()); |
2407 return; | 2502 return; |
2408 } | 2503 } |
2409 | 2504 |
2410 Variable *Src0R = legalizeToReg(Inst->getSrc(0)); | |
2411 Variable *Src1R = legalizeToReg(Inst->getSrc(1)); | |
2412 Variable *T = makeReg(IceType_i32); | 2505 Variable *T = makeReg(IceType_i32); |
2413 _vcmp(Src0R, Src1R); | 2506 Operand *_1 = Ctx->getConstantInt32(1); |
2414 _mov(T, Ctx->getConstantZero(IceType_i32)); | 2507 Operand *_0 = Ctx->getConstantZero(IceType_i32); |
2415 _vmrs(); | 2508 |
2416 Operand *One = Ctx->getConstantInt32(1); | 2509 CondARM32::Cond CondIfTrue0, CondIfTrue1, CondIfFalse; |
2417 InstFcmp::FCond Condition = Inst->getCondition(); | 2510 lowerFcmpCond(Instr, &CondIfTrue0, &CondIfTrue1, &CondIfFalse); |
2418 assert(Condition < llvm::array_lengthof(TableFcmp)); | 2511 |
2419 CondARM32::Cond CC0 = TableFcmp[Condition].CC0; | 2512 bool RedefineT = false; |
2420 CondARM32::Cond CC1 = TableFcmp[Condition].CC1; | 2513 if (CondIfFalse != CondARM32::kNone) { |
2421 if (CC0 != CondARM32::kNone) { | 2514 assert(!RedefineT); |
2422 _mov(T, One, CC0); | 2515 _mov(T, _0, CondIfFalse); |
2423 // If this mov is not a maybe mov, but an actual mov (i.e., CC0 == AL), we | 2516 RedefineT = true; |
2424 // don't want to _set_dest_redefined so that liveness + dead-code | |
2425 // elimination will get rid of the previous assignment (i.e., T = 0) above. | |
2426 // TODO(stichnot,jpp): We should be able to conditionally create the "T=0" | |
2427 // instruction based on CC0, instead of relying on DCE to remove it. | |
2428 if (CC0 != CondARM32::AL) | |
2429 _set_dest_redefined(); | |
2430 } | 2517 } |
2431 if (CC1 != CondARM32::kNone) { | 2518 |
2432 assert(CC0 != CondARM32::kNone); | 2519 if (CondIfTrue0 != CondARM32::kNone) { |
2433 assert(CC1 != CondARM32::AL); | 2520 if (RedefineT) { |
2434 _mov_redefined(T, One, CC1); | 2521 _mov_redefined(T, _1, CondIfTrue0); |
2522 } else { | |
2523 _mov(T, _1, CondIfTrue0); | |
2524 } | |
2525 RedefineT = true; | |
2435 } | 2526 } |
2527 | |
2528 if (CondIfTrue1 != CondARM32::kNone) { | |
2529 assert(RedefineT); | |
2530 _mov_redefined(T, _1, CondIfTrue1); | |
2531 } | |
2532 | |
2436 _mov(Dest, T); | 2533 _mov(Dest, T); |
2437 } | 2534 } |
2438 | 2535 |
2439 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { | 2536 void TargetARM32::lowerIcmpCond(const InstIcmp *Inst, |
2440 Variable *Dest = Inst->getDest(); | 2537 CondARM32::Cond *CondIfTrue, |
2538 CondARM32::Cond *CondIfFalse) { | |
2441 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 2539 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
2442 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | 2540 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
2443 | 2541 |
2444 if (isVectorType(Dest->getType())) { | |
2445 Variable *T = makeReg(Dest->getType()); | |
2446 Context.insert(InstFakeDef::create(Func, T)); | |
2447 _mov(Dest, T); | |
2448 UnimplementedError(Func->getContext()->getFlags()); | |
2449 return; | |
2450 } | |
2451 | |
2452 // a=icmp cond, b, c ==> | 2542 // a=icmp cond, b, c ==> |
2453 // GCC does: | 2543 // GCC does: |
2454 // cmp b.hi, c.hi or cmp b.lo, c.lo | 2544 // cmp b.hi, c.hi or cmp b.lo, c.lo |
2455 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi | 2545 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi |
2456 // mov.<C1> t, #1 mov.<C1> t, #1 | 2546 // mov.<C1> t, #1 mov.<C1> t, #1 |
2457 // mov.<C2> t, #0 mov.<C2> t, #0 | 2547 // mov.<C2> t, #0 mov.<C2> t, #0 |
2458 // mov a, t mov a, t | 2548 // mov a, t mov a, t |
2459 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" | 2549 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" |
2460 // is used for signed compares. In some cases, b and c need to be swapped as | 2550 // is used for signed compares. In some cases, b and c need to be swapped as |
2461 // well. | 2551 // well. |
2462 // | 2552 // |
2463 // LLVM does: | 2553 // LLVM does: |
2464 // for EQ and NE: | 2554 // for EQ and NE: |
2465 // eor t1, b.hi, c.hi | 2555 // eor t1, b.hi, c.hi |
2466 // eor t2, b.lo, c.hi | 2556 // eor t2, b.lo, c.hi |
2467 // orrs t, t1, t2 | 2557 // orrs t, t1, t2 |
2468 // mov.<C> t, #1 | 2558 // mov.<C> t, #1 |
2469 // mov a, t | 2559 // mov a, t |
2470 // | 2560 // |
2471 // that's nice in that it's just as short but has fewer dependencies for | 2561 // that's nice in that it's just as short but has fewer dependencies for |
2472 // better ILP at the cost of more registers. | 2562 // better ILP at the cost of more registers. |
2473 // | 2563 // |
2474 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two | 2564 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two |
2475 // unconditional mov #0, two cmps, two conditional mov #1, and one | 2565 // unconditional mov #0, two cmps, two conditional mov #1, and one |
2476 // conditional reg mov. That has few dependencies for good ILP, but is a | 2566 // conditional reg mov. That has few dependencies for good ILP, but is a |
2477 // longer sequence. | 2567 // longer sequence. |
2478 // | 2568 // |
2479 // So, we are going with the GCC version since it's usually better (except | 2569 // So, we are going with the GCC version since it's usually better (except |
2480 // perhaps for eq/ne). We could revisit special-casing eq/ne later. | 2570 // perhaps for eq/ne). We could revisit special-casing eq/ne later. |
2481 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2571 |
2482 Constant *One = Ctx->getConstantInt32(1); | |
2483 if (Src0->getType() == IceType_i64) { | 2572 if (Src0->getType() == IceType_i64) { |
2484 InstIcmp::ICond Conditon = Inst->getCondition(); | 2573 InstIcmp::ICond Conditon = Inst->getCondition(); |
2485 size_t Index = static_cast<size_t>(Conditon); | 2574 size_t Index = static_cast<size_t>(Conditon); |
2486 assert(Index < llvm::array_lengthof(TableIcmp64)); | 2575 assert(Index < llvm::array_lengthof(TableIcmp64)); |
2487 Variable *Src0Lo, *Src0Hi; | 2576 Variable *Src0Lo, *Src0Hi; |
2488 Operand *Src1LoRF, *Src1HiRF; | 2577 Operand *Src1LoRF, *Src1HiRF; |
2489 if (TableIcmp64[Index].Swapped) { | 2578 if (TableIcmp64[Index].Swapped) { |
2490 Src0Lo = legalizeToReg(loOperand(Src1)); | 2579 Src0Lo = legalizeToReg(loOperand(Src1)); |
2491 Src0Hi = legalizeToReg(hiOperand(Src1)); | 2580 Src0Hi = legalizeToReg(hiOperand(Src1)); |
2492 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | 2581 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
2493 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | 2582 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); |
2494 } else { | 2583 } else { |
2495 Src0Lo = legalizeToReg(loOperand(Src0)); | 2584 Src0Lo = legalizeToReg(loOperand(Src0)); |
2496 Src0Hi = legalizeToReg(hiOperand(Src0)); | 2585 Src0Hi = legalizeToReg(hiOperand(Src0)); |
2497 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); | 2586 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); |
2498 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); | 2587 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); |
2499 } | 2588 } |
2500 Variable *T = makeReg(IceType_i32); | |
2501 if (TableIcmp64[Index].IsSigned) { | 2589 if (TableIcmp64[Index].IsSigned) { |
2502 Variable *ScratchReg = makeReg(IceType_i32); | 2590 Variable *ScratchReg = makeReg(IceType_i32); |
2503 _cmp(Src0Lo, Src1LoRF); | 2591 _cmp(Src0Lo, Src1LoRF); |
2504 _sbcs(ScratchReg, Src0Hi, Src1HiRF); | 2592 _sbcs(ScratchReg, Src0Hi, Src1HiRF); |
2505 // ScratchReg isn't going to be used, but we need the side-effect of | 2593 // ScratchReg isn't going to be used, but we need the side-effect of |
2506 // setting flags from this operation. | 2594 // setting flags from this operation. |
2507 Context.insert(InstFakeUse::create(Func, ScratchReg)); | 2595 Context.insert(InstFakeUse::create(Func, ScratchReg)); |
2508 } else { | 2596 } else { |
2509 _cmp(Src0Hi, Src1HiRF); | 2597 _cmp(Src0Hi, Src1HiRF); |
2510 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); | 2598 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); |
2511 } | 2599 } |
2512 _mov(T, One, TableIcmp64[Index].C1); | 2600 *CondIfTrue = TableIcmp64[Index].C1; |
2513 _mov_redefined(T, Zero, TableIcmp64[Index].C2); | 2601 *CondIfFalse = TableIcmp64[Index].C2; |
2514 _mov(Dest, T); | |
2515 return; | 2602 return; |
2516 } | 2603 } |
2517 | 2604 |
2518 // a=icmp cond b, c ==> | 2605 // a=icmp cond b, c ==> |
2519 // GCC does: | 2606 // GCC does: |
2520 // <u/s>xtb tb, b | 2607 // <u/s>xtb tb, b |
2521 // <u/s>xtb tc, c | 2608 // <u/s>xtb tc, c |
2522 // cmp tb, tc | 2609 // cmp tb, tc |
2523 // mov.C1 t, #0 | 2610 // mov.C1 t, #0 |
2524 // mov.C2 t, #1 | 2611 // mov.C2 t, #1 |
(...skipping 16 matching lines...) Expand all Loading... | |
2541 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For | 2628 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For |
2542 // the unsigned case, for some reason it does similar to GCC and does a uxtb | 2629 // the unsigned case, for some reason it does similar to GCC and does a uxtb |
2543 // first. It's not clear to me why that special-casing is needed. | 2630 // first. It's not clear to me why that special-casing is needed. |
2544 // | 2631 // |
2545 // We'll go with the LLVM way for now, since it's shorter and has just as few | 2632 // We'll go with the LLVM way for now, since it's shorter and has just as few |
2546 // dependencies. | 2633 // dependencies. |
2547 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); | 2634 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); |
2548 assert(ShiftAmt >= 0); | 2635 assert(ShiftAmt >= 0); |
2549 Constant *ShiftConst = nullptr; | 2636 Constant *ShiftConst = nullptr; |
2550 Variable *Src0R = nullptr; | 2637 Variable *Src0R = nullptr; |
2551 Variable *T = makeReg(IceType_i32); | |
2552 if (ShiftAmt) { | 2638 if (ShiftAmt) { |
2553 ShiftConst = Ctx->getConstantInt32(ShiftAmt); | 2639 ShiftConst = Ctx->getConstantInt32(ShiftAmt); |
2554 Src0R = makeReg(IceType_i32); | 2640 Src0R = makeReg(IceType_i32); |
2555 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); | 2641 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); |
2556 } else { | 2642 } else { |
2557 Src0R = legalizeToReg(Src0); | 2643 Src0R = legalizeToReg(Src0); |
2558 } | 2644 } |
2559 _mov(T, Zero); | |
2560 if (ShiftAmt) { | 2645 if (ShiftAmt) { |
2561 Variable *Src1R = legalizeToReg(Src1); | 2646 Variable *Src1R = legalizeToReg(Src1); |
2562 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create( | 2647 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create( |
2563 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst); | 2648 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst); |
2564 _cmp(Src0R, Src1RShifted); | 2649 _cmp(Src0R, Src1RShifted); |
2565 } else { | 2650 } else { |
2566 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); | 2651 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); |
2567 _cmp(Src0R, Src1RF); | 2652 _cmp(Src0R, Src1RF); |
2568 } | 2653 } |
2569 _mov_redefined(T, One, getIcmp32Mapping(Inst->getCondition())); | 2654 *CondIfTrue = getIcmp32Mapping(Inst->getCondition()); |
2655 *CondIfFalse = InstARM32::getOppositeCondition(*CondIfTrue); | |
2656 } | |
2657 | |
2658 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { | |
2659 Variable *Dest = Inst->getDest(); | |
2660 | |
2661 if (isVectorType(Dest->getType())) { | |
2662 Variable *T = makeReg(Dest->getType()); | |
2663 Context.insert(InstFakeDef::create(Func, T)); | |
2664 _mov(Dest, T); | |
2665 UnimplementedError(Func->getContext()->getFlags()); | |
2666 return; | |
2667 } | |
2668 | |
2669 Constant *Zero = Ctx->getConstantZero(IceType_i32); | |
Jim Stichnoth
2015/11/03 20:04:28
_0 and _1 ?
John
2015/11/05 20:25:39
Done.
| |
2670 Constant *One = Ctx->getConstantInt32(1); | |
2671 Variable *T = makeReg(IceType_i32); | |
2672 | |
2673 CondARM32::Cond CondIfTrue, CondIfFalse; | |
2674 lowerIcmpCond(Inst, &CondIfTrue, &CondIfFalse); | |
2675 | |
2676 _mov(T, Zero, CondIfFalse); | |
2677 _mov_redefined(T, One, CondIfTrue); | |
2570 _mov(Dest, T); | 2678 _mov(Dest, T); |
2679 | |
2571 return; | 2680 return; |
2572 } | 2681 } |
2573 | 2682 |
2574 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) { | 2683 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) { |
2575 (void)Inst; | 2684 (void)Inst; |
2576 UnimplementedError(Func->getContext()->getFlags()); | 2685 UnimplementedError(Func->getContext()->getFlags()); |
2577 } | 2686 } |
2578 | 2687 |
2579 namespace { | 2688 namespace { |
2580 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { | 2689 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { |
(...skipping 741 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3322 Operand *SrcF = Inst->getFalseOperand(); | 3431 Operand *SrcF = Inst->getFalseOperand(); |
3323 Operand *Condition = Inst->getCondition(); | 3432 Operand *Condition = Inst->getCondition(); |
3324 | 3433 |
3325 if (isVectorType(DestTy)) { | 3434 if (isVectorType(DestTy)) { |
3326 Variable *T = makeReg(DestTy); | 3435 Variable *T = makeReg(DestTy); |
3327 Context.insert(InstFakeDef::create(Func, T)); | 3436 Context.insert(InstFakeDef::create(Func, T)); |
3328 _mov(Dest, T); | 3437 _mov(Dest, T); |
3329 UnimplementedError(Func->getContext()->getFlags()); | 3438 UnimplementedError(Func->getContext()->getFlags()); |
3330 return; | 3439 return; |
3331 } | 3440 } |
3332 // TODO(jvoung): handle folding opportunities. | 3441 |
3333 // cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t | 3442 CondARM32::Cond CondIfTrue0, CondIfTrue1, CondIfFalse; |
3334 Variable *CmpOpnd0 = legalizeToReg(Condition); | 3443 if (!_mov_i1_to_flags(Condition, &CondIfTrue0, &CondIfTrue1, &CondIfFalse)) { |
3335 Type CmpOpnd0Ty = CmpOpnd0->getType(); | 3444 // "Condition" was not fold. |
3336 Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32); | 3445 // cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t |
3337 assert(CmpOpnd0Ty == IceType_i1); | 3446 Variable *CmpOpnd0 = legalizeToReg(Condition); |
3338 if (CmpOpnd0Ty != IceType_i32) | 3447 Type CmpOpnd0Ty = CmpOpnd0->getType(); |
3339 _uxt(CmpOpnd0, CmpOpnd0); | 3448 Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32); |
3340 _cmp(CmpOpnd0, CmpOpnd1); | 3449 assert(CmpOpnd0Ty == IceType_i1); |
3341 static constexpr CondARM32::Cond Cond = CondARM32::NE; | 3450 if (CmpOpnd0Ty != IceType_i32) |
3451 _uxt(CmpOpnd0, CmpOpnd0); | |
3452 _cmp(CmpOpnd0, CmpOpnd1); | |
3453 CondIfTrue0 = CondARM32::NE; | |
3454 CondIfTrue1 = CondARM32::kNone; | |
3455 CondIfFalse = CondARM32::EQ; | |
3456 } | |
3457 | |
3342 if (DestTy == IceType_i64) { | 3458 if (DestTy == IceType_i64) { |
3343 SrcT = legalizeUndef(SrcT); | 3459 SrcT = legalizeUndef(SrcT); |
3344 SrcF = legalizeUndef(SrcF); | 3460 SrcF = legalizeUndef(SrcF); |
3345 // Set the low portion. | 3461 // Set the low portion. |
3346 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3462 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
3463 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex); | |
3347 Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg | Legal_Flex); | 3464 Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg | Legal_Flex); |
3348 Variable *TLo = makeReg(SrcFLo->getType()); | 3465 Variable *TLo = makeReg(SrcFLo->getType()); |
3349 _mov(TLo, SrcFLo); | 3466 bool RedefineTLo = false; |
3350 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex); | 3467 if (CondIfFalse != CondARM32::kNone) { |
3351 _mov_redefined(TLo, SrcTLo, Cond); | 3468 _mov(TLo, SrcFLo, CondIfFalse); |
3469 RedefineTLo = true; | |
3470 } | |
3471 if (CondIfTrue0 != CondARM32::kNone) { | |
3472 if (!RedefineTLo) | |
3473 _mov(TLo, SrcTLo, CondIfTrue0); | |
3474 else | |
3475 _mov_redefined(TLo, SrcTLo, CondIfTrue0); | |
3476 RedefineTLo = true; | |
3477 } | |
3478 if (CondIfTrue1 != CondARM32::kNone) { | |
3479 assert(RedefineTLo); | |
3480 _mov_redefined(TLo, SrcTLo, CondIfTrue1); | |
3481 } | |
3352 _mov(DestLo, TLo); | 3482 _mov(DestLo, TLo); |
3483 | |
3353 // Set the high portion. | 3484 // Set the high portion. |
3354 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3485 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
3486 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex); | |
3355 Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg | Legal_Flex); | 3487 Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg | Legal_Flex); |
3356 Variable *THi = makeReg(SrcFHi->getType()); | 3488 Variable *THi = makeReg(SrcFHi->getType()); |
3357 _mov(THi, SrcFHi); | 3489 bool RedefineTHi = false; |
3358 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex); | 3490 if (CondIfFalse != CondARM32::kNone) { |
3359 _mov_redefined(THi, SrcTHi, Cond); | 3491 _mov(THi, SrcFHi, CondIfFalse); |
3492 RedefineTHi = true; | |
3493 } | |
3494 if (CondIfTrue0 != CondARM32::kNone) { | |
3495 if (!RedefineTHi) | |
3496 _mov(THi, SrcTHi, CondIfTrue0); | |
3497 else | |
3498 _mov_redefined(THi, SrcTHi, CondIfTrue0); | |
3499 RedefineTHi = true; | |
3500 } | |
3501 if (CondIfTrue1 != CondARM32::kNone) { | |
3502 assert(RedefineTHi); | |
3503 _mov_redefined(THi, SrcTHi, CondIfTrue1); | |
3504 } | |
3360 _mov(DestHi, THi); | 3505 _mov(DestHi, THi); |
3361 return; | 3506 return; |
3362 } | 3507 } |
3363 | 3508 |
3364 if (isFloatingType(DestTy)) { | 3509 if (isFloatingType(DestTy)) { |
3510 SrcT = legalizeToReg(SrcT); | |
3511 SrcF = legalizeToReg(SrcF); | |
3365 Variable *T = makeReg(DestTy); | 3512 Variable *T = makeReg(DestTy); |
3366 SrcF = legalizeToReg(SrcF); | |
3367 assert(DestTy == SrcF->getType()); | 3513 assert(DestTy == SrcF->getType()); |
3368 _mov(T, SrcF); | 3514 bool RedefineT = false; |
3369 SrcT = legalizeToReg(SrcT); | 3515 if (CondIfFalse != CondARM32::kNone) { |
3516 _mov(T, SrcF, CondIfFalse); | |
3517 RedefineT = true; | |
3518 } | |
3519 if (CondIfTrue0 != CondARM32::kNone) { | |
3520 if (!RedefineT) | |
3521 _mov(T, SrcT, CondIfTrue0); | |
3522 else | |
3523 _mov_redefined(T, SrcT, CondIfTrue0); | |
3524 RedefineT = true; | |
3525 } | |
3526 if (CondIfTrue1 != CondARM32::kNone) { | |
3527 assert(RedefineT); | |
3528 _mov_redefined(T, SrcT, CondIfTrue1); | |
3529 } | |
3370 assert(DestTy == SrcT->getType()); | 3530 assert(DestTy == SrcT->getType()); |
3371 _mov(T, SrcT, Cond); | |
3372 _set_dest_redefined(); | |
3373 _mov(Dest, T); | 3531 _mov(Dest, T); |
3374 return; | 3532 return; |
3375 } | 3533 } |
3376 | 3534 |
3535 Variable *T = makeReg(SrcF->getType()); | |
3536 SrcT = legalize(SrcT, Legal_Reg | Legal_Flex); | |
3377 SrcF = legalize(SrcF, Legal_Reg | Legal_Flex); | 3537 SrcF = legalize(SrcF, Legal_Reg | Legal_Flex); |
3378 Variable *T = makeReg(SrcF->getType()); | 3538 bool RedefineT = false; |
3379 _mov(T, SrcF); | 3539 if (CondIfFalse != CondARM32::kNone) { |
3380 SrcT = legalize(SrcT, Legal_Reg | Legal_Flex); | 3540 _mov(T, SrcF, CondIfFalse); |
3381 _mov_redefined(T, SrcT, Cond); | 3541 RedefineT = true; |
3542 } | |
3543 if (CondIfTrue0 != CondARM32::kNone) { | |
3544 if (!RedefineT) | |
3545 _mov(T, SrcT, CondIfTrue0); | |
3546 else | |
3547 _mov_redefined(T, SrcT, CondIfTrue0); | |
3548 RedefineT = true; | |
3549 } | |
3550 if (CondIfTrue1 != CondARM32::kNone) { | |
3551 assert(RedefineT); | |
3552 _mov_redefined(T, SrcT, CondIfTrue1); | |
3553 } | |
3382 _mov(Dest, T); | 3554 _mov(Dest, T); |
3383 } | 3555 } |
3384 | 3556 |
3385 void TargetARM32::lowerStore(const InstStore *Inst) { | 3557 void TargetARM32::lowerStore(const InstStore *Inst) { |
3386 Operand *Value = Inst->getData(); | 3558 Operand *Value = Inst->getData(); |
3387 Operand *Addr = Inst->getAddr(); | 3559 Operand *Addr = Inst->getAddr(); |
3388 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); | 3560 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); |
3389 Type Ty = NewAddr->getType(); | 3561 Type Ty = NewAddr->getType(); |
3390 | 3562 |
3391 if (Ty == IceType_i64) { | 3563 if (Ty == IceType_i64) { |
(...skipping 387 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3779 | 3951 |
3780 void TargetARM32::emit(const ConstantDouble *C) const { | 3952 void TargetARM32::emit(const ConstantDouble *C) const { |
3781 (void)C; | 3953 (void)C; |
3782 UnimplementedError(Ctx->getFlags()); | 3954 UnimplementedError(Ctx->getFlags()); |
3783 } | 3955 } |
3784 | 3956 |
3785 void TargetARM32::emit(const ConstantUndef *) const { | 3957 void TargetARM32::emit(const ConstantUndef *) const { |
3786 llvm::report_fatal_error("undef value encountered by emitter."); | 3958 llvm::report_fatal_error("undef value encountered by emitter."); |
3787 } | 3959 } |
3788 | 3960 |
3961 void TargetARM32::lowerTruncToFlags(Operand *Src, CondARM32::Cond *CondIfTrue, | |
3962 CondARM32::Cond *CondIfFalse) { | |
3963 Operand *_1 = Ctx->getConstantInt32(1); | |
3964 Variable *SrcR = | |
3965 legalizeToReg(Src->getType() == IceType_i64 ? loOperand(Src) : Src); | |
3966 _tst(SrcR, _1); | |
3967 *CondIfTrue = CondARM32::NE; /* NE <-> APSR.Z == 0 */ | |
Jim Stichnoth
2015/11/03 20:04:28
Why the /* */ style comments?
John
2015/11/05 20:25:39
I don't know. Done.
| |
3968 *CondIfFalse = CondARM32::EQ; /* EQ <-> APSR.Z == 1 */ | |
3969 } | |
3970 | |
3971 bool TargetARM32::_mov_i1_to_flags(Operand *Boolean, | |
3972 CondARM32::Cond *CondIfTrue0, | |
3973 CondARM32::Cond *CondIfTrue1, | |
3974 CondARM32::Cond *CondIfFalse) { | |
3975 *CondIfTrue0 = CondARM32::kNone; | |
3976 *CondIfTrue1 = CondARM32::kNone; | |
3977 *CondIfFalse = CondARM32::AL; | |
3978 bool FoldOK = false; | |
3979 if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) { | |
3980 if (const auto *IcmpProducer = llvm::dyn_cast<InstIcmp>(Producer)) { | |
3981 lowerIcmpCond(IcmpProducer, CondIfTrue0, CondIfFalse); | |
3982 FoldOK = true; | |
3983 } else if (const auto *FcmpProducer = llvm::dyn_cast<InstFcmp>(Producer)) { | |
3984 lowerFcmpCond(FcmpProducer, CondIfTrue0, CondIfTrue1, CondIfFalse); | |
3985 FoldOK = true; | |
3986 } else if (const auto *CastProducer = llvm::dyn_cast<InstCast>(Producer)) { | |
3987 assert(CastProducer->getCastKind() == InstCast::Trunc); | |
3988 lowerTruncToFlags(CastProducer->getSrc(0), CondIfTrue0, CondIfFalse); | |
3989 FoldOK = true; | |
3990 } | |
3991 } | |
3992 return FoldOK; | |
3993 } | |
3994 | |
3995 namespace { | |
3996 namespace BoolFolding { | |
3997 bool shouldTrackProducer(const Inst &Instr) { | |
3998 switch (static_cast<uint32_t>(Instr.getKind())) { | |
3999 case Inst::Icmp: | |
4000 return true; | |
4001 case Inst::Fcmp: | |
4002 return true; | |
4003 } | |
4004 if (const auto *Cast = llvm::dyn_cast<InstCast>(&Instr)) { | |
4005 switch (static_cast<uint32_t>(Cast->getCastKind())) { | |
4006 case InstCast::Trunc: | |
4007 return true; | |
4008 } | |
4009 } | |
4010 return false; | |
4011 } | |
4012 | |
4013 bool isValidConsumer(const Inst &Instr) { | |
4014 switch (static_cast<uint32_t>(Instr.getKind())) { | |
4015 case Inst::Br: | |
4016 return true; | |
4017 case Inst::Select: | |
4018 return !isVectorType(Instr.getDest()->getType()); | |
4019 } | |
4020 if (const auto *Cast = llvm::dyn_cast<InstCast>(&Instr)) { | |
4021 switch (static_cast<uint32_t>(Cast->getCastKind())) { | |
4022 case InstCast::Sext: | |
4023 return !isVectorType(Instr.getDest()->getType()); | |
4024 case InstCast::Zext: | |
4025 return !isVectorType(Instr.getDest()->getType()); | |
4026 } | |
4027 } | |
4028 return false; | |
4029 } | |
4030 } // end of namespace BoolFolding | |
4031 } // end of anonymous namespace | |
4032 | |
4033 void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) { | |
4034 for (Inst &Instr : Node->getInsts()) { | |
4035 // Check whether Instr is a valid producer. | |
4036 Variable *Dest = Instr.getDest(); | |
4037 if (!Instr.isDeleted() // only consider non-deleted instructions; and | |
4038 && Dest // only instructions with an actual dest var; and | |
4039 && Dest->getType() == IceType_i1 // only bool-type dest vars; and | |
4040 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr. | |
4041 KnownComputations.emplace(Dest->getIndex(), BoolComputationEntry(&Instr)); | |
4042 } | |
4043 // Check each src variable against the map. | |
4044 FOREACH_VAR_IN_INST(Var, Instr) { | |
4045 SizeT VarNum = Var->getIndex(); | |
4046 auto ComputationIter = KnownComputations.find(VarNum); | |
4047 if (ComputationIter == KnownComputations.end()) { | |
4048 continue; | |
4049 } | |
4050 | |
4051 if (IndexOfVarOperandInInst(Var) != 0 || | |
4052 !BoolFolding::isValidConsumer(Instr)) { | |
4053 // All valid consumers use Var as the first source operand | |
4054 KnownComputations.erase(VarNum); | |
4055 continue; | |
4056 } | |
4057 | |
4058 if (Instr.isLastUse(Var)) { | |
4059 ComputationIter->second.IsLiveOut = false; | |
4060 } | |
4061 } | |
4062 } | |
4063 | |
4064 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end(); | |
4065 Iter != End;) { | |
4066 // Disable the folding if its dest may be live beyond this block. | |
4067 if (Iter->second.IsLiveOut) { | |
4068 Iter = KnownComputations.erase(Iter); | |
4069 continue; | |
4070 } | |
4071 | |
4072 // Mark as "dead" rather than outright deleting. This is so that other | |
4073 // peephole style optimizations during or before lowering have access to | |
4074 // this instruction in undeleted form. See for example | |
4075 // tryOptimizedCmpxchgCmpBr(). | |
4076 Iter->second.Instr->setDead(); | |
4077 ++Iter; | |
4078 } | |
4079 } | |
4080 | |
3789 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx) | 4081 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx) |
3790 : TargetDataLowering(Ctx) {} | 4082 : TargetDataLowering(Ctx) {} |
3791 | 4083 |
3792 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars, | 4084 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars, |
3793 const IceString &SectionSuffix) { | 4085 const IceString &SectionSuffix) { |
3794 switch (Ctx->getFlags().getOutFileType()) { | 4086 switch (Ctx->getFlags().getOutFileType()) { |
3795 case FT_Elf: { | 4087 case FT_Elf: { |
3796 ELFObjectWriter *Writer = Ctx->getObjectWriter(); | 4088 ELFObjectWriter *Writer = Ctx->getObjectWriter(); |
3797 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix); | 4089 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix); |
3798 } break; | 4090 } break; |
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3970 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; | 4262 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; |
3971 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { | 4263 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { |
3972 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; | 4264 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; |
3973 } | 4265 } |
3974 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 4266 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
3975 // However, for compatibility with current NaCl LLVM, don't claim that. | 4267 // However, for compatibility with current NaCl LLVM, don't claim that. |
3976 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 4268 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
3977 } | 4269 } |
3978 | 4270 |
3979 } // end of namespace Ice | 4271 } // end of namespace Ice |
OLD | NEW |