Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(374)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1414883007: Subzero. ARM32. Implements bool folding. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments && pulls. Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/assembler/arm32/branch-mult-fwd.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
11 /// This file implements the TargetLoweringARM32 class, which consists almost 11 /// This file implements the TargetLoweringARM32 class, which consists almost
12 /// entirely of the lowering sequence for each high-level instruction. 12 /// entirely of the lowering sequence for each high-level instruction.
13 /// 13 ///
14 //===----------------------------------------------------------------------===// 14 //===----------------------------------------------------------------------===//
15 #include "IceTargetLoweringARM32.h" 15 #include "IceTargetLoweringARM32.h"
16 16
17 #include "IceCfg.h" 17 #include "IceCfg.h"
18 #include "IceCfgNode.h" 18 #include "IceCfgNode.h"
19 #include "IceClFlags.h" 19 #include "IceClFlags.h"
20 #include "IceDefs.h" 20 #include "IceDefs.h"
21 #include "IceELFObjectWriter.h" 21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h" 22 #include "IceGlobalInits.h"
23 #include "IceInstARM32.def" 23 #include "IceInstARM32.def"
24 #include "IceInstARM32.h" 24 #include "IceInstARM32.h"
25 #include "IceInstVarIter.h"
25 #include "IceLiveness.h" 26 #include "IceLiveness.h"
26 #include "IceOperand.h" 27 #include "IceOperand.h"
27 #include "IcePhiLoweringImpl.h" 28 #include "IcePhiLoweringImpl.h"
28 #include "IceRegistersARM32.h" 29 #include "IceRegistersARM32.h"
29 #include "IceTargetLoweringARM32.def" 30 #include "IceTargetLoweringARM32.def"
30 #include "IceUtils.h" 31 #include "IceUtils.h"
31 #include "llvm/Support/MathExtras.h" 32 #include "llvm/Support/MathExtras.h"
32 33
33 #include <algorithm> 34 #include <algorithm>
34 #include <utility> 35 #include <utility>
(...skipping 1761 matching lines...) Expand 10 before | Expand all | Expand 10 after
1796 _mov(Dest, SrcR); 1797 _mov(Dest, SrcR);
1797 } else if (isFloatingType(Dest->getType())) { 1798 } else if (isFloatingType(Dest->getType())) {
1798 Variable *SrcR = legalizeToReg(NewSrc); 1799 Variable *SrcR = legalizeToReg(NewSrc);
1799 _mov(Dest, SrcR); 1800 _mov(Dest, SrcR);
1800 } else { 1801 } else {
1801 _mov(Dest, NewSrc); 1802 _mov(Dest, NewSrc);
1802 } 1803 }
1803 } 1804 }
1804 } 1805 }
1805 1806
1806 void TargetARM32::lowerBr(const InstBr *Inst) { 1807 void TargetARM32::lowerBr(const InstBr *Instr) {
1807 if (Inst->isUnconditional()) { 1808 if (Instr->isUnconditional()) {
1808 _br(Inst->getTargetUnconditional()); 1809 _br(Instr->getTargetUnconditional());
1809 return; 1810 return;
1810 } 1811 }
1811 Operand *Cond = Inst->getCondition(); 1812 Operand *Cond = Instr->getCondition();
1812 // TODO(jvoung): Handle folding opportunities.
1813 1813
1814 Type Ty = Cond->getType(); 1814 CondARM32::Cond BrCondTrue0 = CondARM32::NE;
1815 Variable *Src0R = legalizeToReg(Cond); 1815 CondARM32::Cond BrCondTrue1 = CondARM32::kNone;
1816 assert(Ty == IceType_i1); 1816 CondARM32::Cond BrCondFalse = CondARM32::kNone;
1817 if (Ty != IceType_i32) 1817 if (!_mov_i1_to_flags(Cond, &BrCondTrue0, &BrCondTrue1, &BrCondFalse)) {
1818 _uxt(Src0R, Src0R); 1818 // "Cond" was not fold.
1819 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1819 Type Ty = Cond->getType();
1820 _cmp(Src0R, Zero); 1820 Variable *Src0R = legalizeToReg(Cond);
1821 _br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE); 1821 assert(Ty == IceType_i1);
1822 if (Ty != IceType_i32)
1823 _uxt(Src0R, Src0R);
1824 Constant *_0 = Ctx->getConstantZero(IceType_i32);
1825 _cmp(Src0R, _0);
1826 BrCondTrue0 = CondARM32::NE;
1827 }
1828
1829 if (BrCondTrue1 != CondARM32::kNone) {
1830 _br(Instr->getTargetTrue(), BrCondTrue1);
1831 }
1832
1833 if (BrCondTrue0 == CondARM32::kNone) {
1834 assert(BrCondTrue1 == CondARM32::kNone);
1835 _br(Instr->getTargetFalse());
1836 return;
1837 }
1838
1839 if (BrCondTrue0 == CondARM32::AL) {
1840 assert(BrCondTrue1 == CondARM32::kNone);
1841 assert(BrCondFalse == CondARM32::kNone);
1842 _br(Instr->getTargetTrue());
1843 return;
1844 }
1845
1846 _br(Instr->getTargetTrue(), Instr->getTargetFalse(), BrCondTrue0);
1822 } 1847 }
1823 1848
1824 void TargetARM32::lowerCall(const InstCall *Instr) { 1849 void TargetARM32::lowerCall(const InstCall *Instr) {
1825 MaybeLeafFunc = false; 1850 MaybeLeafFunc = false;
1826 NeedsStackAlignment = true; 1851 NeedsStackAlignment = true;
1827 1852
1828 // Assign arguments to registers and stack. Also reserve stack. 1853 // Assign arguments to registers and stack. Also reserve stack.
1829 TargetARM32::CallingConv CC; 1854 TargetARM32::CallingConv CC;
1830 // Pair of Arg Operand -> GPR number assignments. 1855 // Pair of Arg Operand -> GPR number assignments.
1831 llvm::SmallVector<std::pair<Operand *, int32_t>, 1856 llvm::SmallVector<std::pair<Operand *, int32_t>,
(...skipping 211 matching lines...) Expand 10 before | Expand all | Expand 10 after
2043 UnimplementedError(Func->getContext()->getFlags()); 2068 UnimplementedError(Func->getContext()->getFlags());
2044 } else if (Dest->getType() == IceType_i64) { 2069 } else if (Dest->getType() == IceType_i64) {
2045 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2 2070 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
2046 Constant *ShiftAmt = Ctx->getConstantInt32(31); 2071 Constant *ShiftAmt = Ctx->getConstantInt32(31);
2047 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2072 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2048 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2073 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2049 Variable *T_Lo = makeReg(DestLo->getType()); 2074 Variable *T_Lo = makeReg(DestLo->getType());
2050 if (Src0->getType() == IceType_i32) { 2075 if (Src0->getType() == IceType_i32) {
2051 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); 2076 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
2052 _mov(T_Lo, Src0RF); 2077 _mov(T_Lo, Src0RF);
2053 } else if (Src0->getType() == IceType_i1) { 2078 } else if (Src0->getType() != IceType_i1) {
2054 Variable *Src0R = legalizeToReg(Src0);
2055 _lsl(T_Lo, Src0R, ShiftAmt);
2056 _asr(T_Lo, T_Lo, ShiftAmt);
2057 } else {
2058 Variable *Src0R = legalizeToReg(Src0); 2079 Variable *Src0R = legalizeToReg(Src0);
2059 _sxt(T_Lo, Src0R); 2080 _sxt(T_Lo, Src0R);
2081 } else {
2082 CondARM32::Cond CondTrue0, CondTrue1, CondFalse;
2083 if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) {
2084 // Handle bool folding.
2085 Constant *_0 = Ctx->getConstantZero(IceType_i32);
2086 Operand *_m1 =
2087 legalize(Ctx->getConstantInt32(-1), Legal_Reg | Legal_Flex);
2088 _cmov(T_Lo, _m1, CondTrue0, CondTrue1, _0, CondFalse);
2089 } else {
2090 Variable *Src0R = legalizeToReg(Src0);
2091 _lsl(T_Lo, Src0R, ShiftAmt);
2092 _asr(T_Lo, T_Lo, ShiftAmt);
2093 }
2060 } 2094 }
2061 _mov(DestLo, T_Lo); 2095 _mov(DestLo, T_Lo);
2062 Variable *T_Hi = makeReg(DestHi->getType()); 2096 Variable *T_Hi = makeReg(DestHi->getType());
2063 if (Src0->getType() != IceType_i1) { 2097 if (Src0->getType() != IceType_i1) {
2064 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo, 2098 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
2065 OperandARM32::ASR, ShiftAmt)); 2099 OperandARM32::ASR, ShiftAmt));
2066 } else { 2100 } else {
2067 // For i1, the asr instruction is already done above. 2101 // For i1, the asr instruction is already done above.
2068 _mov(T_Hi, T_Lo); 2102 _mov(T_Hi, T_Lo);
2069 } 2103 }
2070 _mov(DestHi, T_Hi); 2104 _mov(DestHi, T_Hi);
2071 } else if (Src0->getType() == IceType_i1) { 2105 } else if (Src0->getType() != IceType_i1) {
2072 // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.
2073 // lsl t1, src_reg, 31
2074 // asr t1, t1, 31
2075 // dst = t1
2076 Variable *Src0R = legalizeToReg(Src0);
2077 Constant *ShiftAmt = Ctx->getConstantInt32(31);
2078 Variable *T = makeReg(Dest->getType());
2079 _lsl(T, Src0R, ShiftAmt);
2080 _asr(T, T, ShiftAmt);
2081 _mov(Dest, T);
2082 } else {
2083 // t1 = sxt src; dst = t1 2106 // t1 = sxt src; dst = t1
2084 Variable *Src0R = legalizeToReg(Src0); 2107 Variable *Src0R = legalizeToReg(Src0);
2085 Variable *T = makeReg(Dest->getType()); 2108 Variable *T = makeReg(Dest->getType());
2086 _sxt(T, Src0R); 2109 _sxt(T, Src0R);
2087 _mov(Dest, T); 2110 _mov(Dest, T);
2111 } else {
2112 Variable *T = makeReg(Dest->getType());
2113 CondARM32::Cond CondTrue0, CondTrue1, CondFalse;
2114 if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) {
2115 // Handle bool folding.
2116 Constant *_0 = Ctx->getConstantZero(IceType_i32);
2117 Operand *_m1 =
2118 legalize(Ctx->getConstantInt32(-1), Legal_Reg | Legal_Flex);
2119 _cmov(T, _m1, CondTrue0, CondTrue1, _0, CondFalse);
2120 } else {
2121 // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.
2122 // lsl t1, src_reg, 31
2123 // asr t1, t1, 31
2124 // dst = t1
2125 Variable *Src0R = legalizeToReg(Src0);
2126 Constant *ShiftAmt = Ctx->getConstantInt32(31);
2127 _lsl(T, Src0R, ShiftAmt);
2128 _asr(T, T, ShiftAmt);
2129 }
2130 _mov(Dest, T);
2088 } 2131 }
2089 break; 2132 break;
2090 } 2133 }
2091 case InstCast::Zext: { 2134 case InstCast::Zext: {
2092 if (isVectorType(Dest->getType())) { 2135 if (isVectorType(Dest->getType())) {
2093 Variable *T = makeReg(Dest->getType()); 2136 Variable *T = makeReg(Dest->getType());
2094 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0))); 2137 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
2095 _mov(Dest, T); 2138 _mov(Dest, T);
2096 UnimplementedError(Func->getContext()->getFlags()); 2139 UnimplementedError(Func->getContext()->getFlags());
2097 } else if (Dest->getType() == IceType_i64) { 2140 } else if (Dest->getType() == IceType_i64) {
2098 // t1=uxtb src; dst.lo=t1; dst.hi=0 2141 // t1=uxtb src; dst.lo=t1; dst.hi=0
2099 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2142 Constant *_0 = Ctx->getConstantZero(IceType_i32);
2143 Constant *_1 = Ctx->getConstantInt32(1);
2100 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2144 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2101 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2145 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2102 Variable *T_Lo = makeReg(DestLo->getType()); 2146 Variable *T_Lo = makeReg(DestLo->getType());
2147
2148 CondARM32::Cond CondTrue0, CondTrue1, CondFalse;
2149 if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) {
2150 // Handle folding opportunities.
2151 Variable *T_Hi = makeReg(DestLo->getType());
2152 _mov(T_Hi, _0);
2153 _mov(DestHi, T_Hi);
2154 _cmov(T_Lo, _1, CondTrue0, CondTrue1, _0, CondFalse);
2155 _mov(DestLo, T_Lo);
2156 return;
2157 }
2158
2103 // i32 and i1 can just take up the whole register. i32 doesn't need uxt, 2159 // i32 and i1 can just take up the whole register. i32 doesn't need uxt,
2104 // while i1 will have an and mask later anyway. 2160 // while i1 will have an and mask later anyway.
2105 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) { 2161 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {
2106 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); 2162 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
2107 _mov(T_Lo, Src0RF); 2163 _mov(T_Lo, Src0RF);
2108 } else { 2164 } else {
2109 Variable *Src0R = legalizeToReg(Src0); 2165 Variable *Src0R = legalizeToReg(Src0);
2110 _uxt(T_Lo, Src0R); 2166 _uxt(T_Lo, Src0R);
2111 } 2167 }
2112 if (Src0->getType() == IceType_i1) { 2168 if (Src0->getType() == IceType_i1) {
2113 Constant *One = Ctx->getConstantInt32(1); 2169 Constant *One = Ctx->getConstantInt32(1);
2114 _and(T_Lo, T_Lo, One); 2170 _and(T_Lo, T_Lo, One);
2115 } 2171 }
2116 _mov(DestLo, T_Lo); 2172 _mov(DestLo, T_Lo);
2117 Variable *T_Hi = makeReg(DestLo->getType()); 2173 Variable *T_Hi = makeReg(DestLo->getType());
2118 _mov(T_Hi, Zero); 2174 _mov(T_Hi, _0);
2119 _mov(DestHi, T_Hi); 2175 _mov(DestHi, T_Hi);
2120 } else if (Src0->getType() == IceType_i1) { 2176 } else if (Src0->getType() == IceType_i1) {
2177 Constant *_1 = Ctx->getConstantInt32(1);
2178 Variable *T = makeReg(Dest->getType());
2179
2180 CondARM32::Cond CondTrue0, CondTrue1, CondFalse;
2181 if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) {
2182 // Handle folding opportunities.
2183 Constant *_0 = Ctx->getConstantZero(IceType_i32);
2184 _cmov(T, _1, CondTrue0, CondTrue1, _0, CondFalse);
2185 _mov(Dest, T);
2186 return;
2187 }
2188
2121 // t = Src0; t &= 1; Dest = t 2189 // t = Src0; t &= 1; Dest = t
2122 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); 2190 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
2123 Constant *One = Ctx->getConstantInt32(1);
2124 Variable *T = makeReg(Dest->getType());
2125 // Just use _mov instead of _uxt since all registers are 32-bit. _uxt 2191 // Just use _mov instead of _uxt since all registers are 32-bit. _uxt
2126 // requires the source to be a register so could have required a _mov 2192 // requires the source to be a register so could have required a _mov
2127 // from legalize anyway. 2193 // from legalize anyway.
2128 _mov(T, Src0RF); 2194 _mov(T, Src0RF);
2129 _and(T, T, One); 2195 _and(T, T, _1);
2130 _mov(Dest, T); 2196 _mov(Dest, T);
2131 } else { 2197 } else {
2132 // t1 = uxt src; dst = t1 2198 // t1 = uxt src; dst = t1
2133 Variable *Src0R = legalizeToReg(Src0); 2199 Variable *Src0R = legalizeToReg(Src0);
2134 Variable *T = makeReg(Dest->getType()); 2200 Variable *T = makeReg(Dest->getType());
2135 _uxt(T, Src0R); 2201 _uxt(T, Src0R);
2136 _mov(Dest, T); 2202 _mov(Dest, T);
2137 } 2203 }
2138 break; 2204 break;
2139 } 2205 }
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after
2390 CondARM32::Cond CC1; 2456 CondARM32::Cond CC1;
2391 } TableFcmp[] = { 2457 } TableFcmp[] = {
2392 #define X(val, CC0, CC1) \ 2458 #define X(val, CC0, CC1) \
2393 { CondARM32::CC0, CondARM32::CC1 } \ 2459 { CondARM32::CC0, CondARM32::CC1 } \
2394 , 2460 ,
2395 FCMPARM32_TABLE 2461 FCMPARM32_TABLE
2396 #undef X 2462 #undef X
2397 }; 2463 };
2398 } // end of anonymous namespace 2464 } // end of anonymous namespace
2399 2465
2400 void TargetARM32::lowerFcmp(const InstFcmp *Inst) { 2466 void TargetARM32::lowerFcmpCond(const InstFcmp *Instr,
2401 Variable *Dest = Inst->getDest(); 2467 CondARM32::Cond *CondIfTrue0,
2468 CondARM32::Cond *CondIfTrue1,
2469 CondARM32::Cond *CondIfFalse) {
2470 InstFcmp::FCond Condition = Instr->getCondition();
2471 switch (Condition) {
2472 case InstFcmp::False:
2473 *CondIfFalse = CondARM32::AL;
2474 *CondIfTrue0 = *CondIfTrue1 = CondARM32::kNone;
2475 break;
2476 case InstFcmp::True:
2477 *CondIfFalse = *CondIfTrue1 = CondARM32::kNone;
2478 *CondIfTrue0 = CondARM32::AL;
2479 break;
2480 default: {
2481 Variable *Src0R = legalizeToReg(Instr->getSrc(0));
2482 Variable *Src1R = legalizeToReg(Instr->getSrc(1));
2483 _vcmp(Src0R, Src1R);
2484 _vmrs();
2485 assert(Condition < llvm::array_lengthof(TableFcmp));
2486 *CondIfTrue0 = TableFcmp[Condition].CC0;
2487 *CondIfTrue1 = TableFcmp[Condition].CC1;
2488 *CondIfFalse = (*CondIfTrue1 != CondARM32::kNone)
2489 ? CondARM32::AL
2490 : InstARM32::getOppositeCondition(*CondIfTrue0);
2491 }
2492 }
2493 }
2494
2495 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {
2496 Variable *Dest = Instr->getDest();
2402 if (isVectorType(Dest->getType())) { 2497 if (isVectorType(Dest->getType())) {
2403 Variable *T = makeReg(Dest->getType()); 2498 Variable *T = makeReg(Dest->getType());
2404 Context.insert(InstFakeDef::create(Func, T)); 2499 Context.insert(InstFakeDef::create(Func, T));
2405 _mov(Dest, T); 2500 _mov(Dest, T);
2406 UnimplementedError(Func->getContext()->getFlags()); 2501 UnimplementedError(Func->getContext()->getFlags());
2407 return; 2502 return;
2408 } 2503 }
2409 2504
2410 Variable *Src0R = legalizeToReg(Inst->getSrc(0));
2411 Variable *Src1R = legalizeToReg(Inst->getSrc(1));
2412 Variable *T = makeReg(IceType_i32); 2505 Variable *T = makeReg(IceType_i32);
2413 _vcmp(Src0R, Src1R); 2506 Operand *_1 = Ctx->getConstantInt32(1);
2414 _mov(T, Ctx->getConstantZero(IceType_i32)); 2507 Operand *_0 = Ctx->getConstantZero(IceType_i32);
2415 _vmrs(); 2508
2416 Operand *One = Ctx->getConstantInt32(1); 2509 CondARM32::Cond CondIfTrue0, CondIfTrue1, CondIfFalse;
2417 InstFcmp::FCond Condition = Inst->getCondition(); 2510 lowerFcmpCond(Instr, &CondIfTrue0, &CondIfTrue1, &CondIfFalse);
2418 assert(Condition < llvm::array_lengthof(TableFcmp)); 2511
2419 CondARM32::Cond CC0 = TableFcmp[Condition].CC0; 2512 bool RedefineT = false;
2420 CondARM32::Cond CC1 = TableFcmp[Condition].CC1; 2513 if (CondIfFalse != CondARM32::kNone) {
2421 if (CC0 != CondARM32::kNone) { 2514 assert(!RedefineT);
2422 _mov(T, One, CC0); 2515 _mov(T, _0, CondIfFalse);
2423 // If this mov is not a maybe mov, but an actual mov (i.e., CC0 == AL), we 2516 RedefineT = true;
2424 // don't want to _set_dest_redefined so that liveness + dead-code
2425 // elimination will get rid of the previous assignment (i.e., T = 0) above.
2426 // TODO(stichnot,jpp): We should be able to conditionally create the "T=0"
2427 // instruction based on CC0, instead of relying on DCE to remove it.
2428 if (CC0 != CondARM32::AL)
2429 _set_dest_redefined();
2430 } 2517 }
2431 if (CC1 != CondARM32::kNone) { 2518
2432 assert(CC0 != CondARM32::kNone); 2519 if (CondIfTrue0 != CondARM32::kNone) {
2433 assert(CC1 != CondARM32::AL); 2520 if (RedefineT) {
2434 _mov_redefined(T, One, CC1); 2521 _mov_redefined(T, _1, CondIfTrue0);
2522 } else {
2523 _mov(T, _1, CondIfTrue0);
2524 }
2525 RedefineT = true;
2435 } 2526 }
2527
2528 if (CondIfTrue1 != CondARM32::kNone) {
2529 assert(RedefineT);
2530 _mov_redefined(T, _1, CondIfTrue1);
2531 }
2532
2436 _mov(Dest, T); 2533 _mov(Dest, T);
2437 } 2534 }
2438 2535
2439 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { 2536 void TargetARM32::lowerIcmpCond(const InstIcmp *Inst,
2440 Variable *Dest = Inst->getDest(); 2537 CondARM32::Cond *CondIfTrue,
2538 CondARM32::Cond *CondIfFalse) {
2441 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); 2539 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
2442 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); 2540 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
2443 2541
2444 if (isVectorType(Dest->getType())) {
2445 Variable *T = makeReg(Dest->getType());
2446 Context.insert(InstFakeDef::create(Func, T));
2447 _mov(Dest, T);
2448 UnimplementedError(Func->getContext()->getFlags());
2449 return;
2450 }
2451
2452 // a=icmp cond, b, c ==> 2542 // a=icmp cond, b, c ==>
2453 // GCC does: 2543 // GCC does:
2454 // cmp b.hi, c.hi or cmp b.lo, c.lo 2544 // cmp b.hi, c.hi or cmp b.lo, c.lo
2455 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi 2545 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
2456 // mov.<C1> t, #1 mov.<C1> t, #1 2546 // mov.<C1> t, #1 mov.<C1> t, #1
2457 // mov.<C2> t, #0 mov.<C2> t, #0 2547 // mov.<C2> t, #0 mov.<C2> t, #0
2458 // mov a, t mov a, t 2548 // mov a, t mov a, t
2459 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" 2549 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
2460 // is used for signed compares. In some cases, b and c need to be swapped as 2550 // is used for signed compares. In some cases, b and c need to be swapped as
2461 // well. 2551 // well.
2462 // 2552 //
2463 // LLVM does: 2553 // LLVM does:
2464 // for EQ and NE: 2554 // for EQ and NE:
2465 // eor t1, b.hi, c.hi 2555 // eor t1, b.hi, c.hi
2466 // eor t2, b.lo, c.hi 2556 // eor t2, b.lo, c.hi
2467 // orrs t, t1, t2 2557 // orrs t, t1, t2
2468 // mov.<C> t, #1 2558 // mov.<C> t, #1
2469 // mov a, t 2559 // mov a, t
2470 // 2560 //
2471 // that's nice in that it's just as short but has fewer dependencies for 2561 // that's nice in that it's just as short but has fewer dependencies for
2472 // better ILP at the cost of more registers. 2562 // better ILP at the cost of more registers.
2473 // 2563 //
2474 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two 2564 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
2475 // unconditional mov #0, two cmps, two conditional mov #1, and one 2565 // unconditional mov #0, two cmps, two conditional mov #1, and one
2476 // conditional reg mov. That has few dependencies for good ILP, but is a 2566 // conditional reg mov. That has few dependencies for good ILP, but is a
2477 // longer sequence. 2567 // longer sequence.
2478 // 2568 //
2479 // So, we are going with the GCC version since it's usually better (except 2569 // So, we are going with the GCC version since it's usually better (except
2480 // perhaps for eq/ne). We could revisit special-casing eq/ne later. 2570 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
2481 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2571
2482 Constant *One = Ctx->getConstantInt32(1);
2483 if (Src0->getType() == IceType_i64) { 2572 if (Src0->getType() == IceType_i64) {
2484 InstIcmp::ICond Conditon = Inst->getCondition(); 2573 InstIcmp::ICond Conditon = Inst->getCondition();
2485 size_t Index = static_cast<size_t>(Conditon); 2574 size_t Index = static_cast<size_t>(Conditon);
2486 assert(Index < llvm::array_lengthof(TableIcmp64)); 2575 assert(Index < llvm::array_lengthof(TableIcmp64));
2487 Variable *Src0Lo, *Src0Hi; 2576 Variable *Src0Lo, *Src0Hi;
2488 Operand *Src1LoRF, *Src1HiRF; 2577 Operand *Src1LoRF, *Src1HiRF;
2489 if (TableIcmp64[Index].Swapped) { 2578 if (TableIcmp64[Index].Swapped) {
2490 Src0Lo = legalizeToReg(loOperand(Src1)); 2579 Src0Lo = legalizeToReg(loOperand(Src1));
2491 Src0Hi = legalizeToReg(hiOperand(Src1)); 2580 Src0Hi = legalizeToReg(hiOperand(Src1));
2492 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); 2581 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
2493 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); 2582 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
2494 } else { 2583 } else {
2495 Src0Lo = legalizeToReg(loOperand(Src0)); 2584 Src0Lo = legalizeToReg(loOperand(Src0));
2496 Src0Hi = legalizeToReg(hiOperand(Src0)); 2585 Src0Hi = legalizeToReg(hiOperand(Src0));
2497 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); 2586 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
2498 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); 2587 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
2499 } 2588 }
2500 Variable *T = makeReg(IceType_i32);
2501 if (TableIcmp64[Index].IsSigned) { 2589 if (TableIcmp64[Index].IsSigned) {
2502 Variable *ScratchReg = makeReg(IceType_i32); 2590 Variable *ScratchReg = makeReg(IceType_i32);
2503 _cmp(Src0Lo, Src1LoRF); 2591 _cmp(Src0Lo, Src1LoRF);
2504 _sbcs(ScratchReg, Src0Hi, Src1HiRF); 2592 _sbcs(ScratchReg, Src0Hi, Src1HiRF);
2505 // ScratchReg isn't going to be used, but we need the side-effect of 2593 // ScratchReg isn't going to be used, but we need the side-effect of
2506 // setting flags from this operation. 2594 // setting flags from this operation.
2507 Context.insert(InstFakeUse::create(Func, ScratchReg)); 2595 Context.insert(InstFakeUse::create(Func, ScratchReg));
2508 } else { 2596 } else {
2509 _cmp(Src0Hi, Src1HiRF); 2597 _cmp(Src0Hi, Src1HiRF);
2510 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); 2598 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
2511 } 2599 }
2512 _mov(T, One, TableIcmp64[Index].C1); 2600 *CondIfTrue = TableIcmp64[Index].C1;
2513 _mov_redefined(T, Zero, TableIcmp64[Index].C2); 2601 *CondIfFalse = TableIcmp64[Index].C2;
2514 _mov(Dest, T);
2515 return; 2602 return;
2516 } 2603 }
2517 2604
2518 // a=icmp cond b, c ==> 2605 // a=icmp cond b, c ==>
2519 // GCC does: 2606 // GCC does:
2520 // <u/s>xtb tb, b 2607 // <u/s>xtb tb, b
2521 // <u/s>xtb tc, c 2608 // <u/s>xtb tc, c
2522 // cmp tb, tc 2609 // cmp tb, tc
2523 // mov.C1 t, #0 2610 // mov.C1 t, #0
2524 // mov.C2 t, #1 2611 // mov.C2 t, #1
(...skipping 16 matching lines...) Expand all
2541 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For 2628 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
2542 // the unsigned case, for some reason it does similar to GCC and does a uxtb 2629 // the unsigned case, for some reason it does similar to GCC and does a uxtb
2543 // first. It's not clear to me why that special-casing is needed. 2630 // first. It's not clear to me why that special-casing is needed.
2544 // 2631 //
2545 // We'll go with the LLVM way for now, since it's shorter and has just as few 2632 // We'll go with the LLVM way for now, since it's shorter and has just as few
2546 // dependencies. 2633 // dependencies.
2547 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); 2634 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
2548 assert(ShiftAmt >= 0); 2635 assert(ShiftAmt >= 0);
2549 Constant *ShiftConst = nullptr; 2636 Constant *ShiftConst = nullptr;
2550 Variable *Src0R = nullptr; 2637 Variable *Src0R = nullptr;
2551 Variable *T = makeReg(IceType_i32);
2552 if (ShiftAmt) { 2638 if (ShiftAmt) {
2553 ShiftConst = Ctx->getConstantInt32(ShiftAmt); 2639 ShiftConst = Ctx->getConstantInt32(ShiftAmt);
2554 Src0R = makeReg(IceType_i32); 2640 Src0R = makeReg(IceType_i32);
2555 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); 2641 _lsl(Src0R, legalizeToReg(Src0), ShiftConst);
2556 } else { 2642 } else {
2557 Src0R = legalizeToReg(Src0); 2643 Src0R = legalizeToReg(Src0);
2558 } 2644 }
2559 _mov(T, Zero);
2560 if (ShiftAmt) { 2645 if (ShiftAmt) {
2561 Variable *Src1R = legalizeToReg(Src1); 2646 Variable *Src1R = legalizeToReg(Src1);
2562 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create( 2647 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
2563 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst); 2648 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
2564 _cmp(Src0R, Src1RShifted); 2649 _cmp(Src0R, Src1RShifted);
2565 } else { 2650 } else {
2566 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); 2651 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
2567 _cmp(Src0R, Src1RF); 2652 _cmp(Src0R, Src1RF);
2568 } 2653 }
2569 _mov_redefined(T, One, getIcmp32Mapping(Inst->getCondition())); 2654 *CondIfTrue = getIcmp32Mapping(Inst->getCondition());
2655 *CondIfFalse = InstARM32::getOppositeCondition(*CondIfTrue);
2656 }
2657
2658 void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
2659 Variable *Dest = Inst->getDest();
2660
2661 if (isVectorType(Dest->getType())) {
2662 Variable *T = makeReg(Dest->getType());
2663 Context.insert(InstFakeDef::create(Func, T));
2664 _mov(Dest, T);
2665 UnimplementedError(Func->getContext()->getFlags());
2666 return;
2667 }
2668
2669 Constant *_0 = Ctx->getConstantZero(IceType_i32);
2670 Constant *_1 = Ctx->getConstantInt32(1);
2671 Variable *T = makeReg(IceType_i32);
2672
2673 CondARM32::Cond CondIfTrue, CondIfFalse;
2674 lowerIcmpCond(Inst, &CondIfTrue, &CondIfFalse);
2675
2676 _mov(T, _0, CondIfFalse);
2677 _mov_redefined(T, _1, CondIfTrue);
2570 _mov(Dest, T); 2678 _mov(Dest, T);
2679
2571 return; 2680 return;
2572 } 2681 }
2573 2682
2574 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) { 2683 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {
2575 (void)Inst; 2684 (void)Inst;
2576 UnimplementedError(Func->getContext()->getFlags()); 2685 UnimplementedError(Func->getContext()->getFlags());
2577 } 2686 }
2578 2687
2579 namespace { 2688 namespace {
2580 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { 2689 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
(...skipping 741 matching lines...) Expand 10 before | Expand all | Expand 10 after
3322 Operand *SrcF = Inst->getFalseOperand(); 3431 Operand *SrcF = Inst->getFalseOperand();
3323 Operand *Condition = Inst->getCondition(); 3432 Operand *Condition = Inst->getCondition();
3324 3433
3325 if (isVectorType(DestTy)) { 3434 if (isVectorType(DestTy)) {
3326 Variable *T = makeReg(DestTy); 3435 Variable *T = makeReg(DestTy);
3327 Context.insert(InstFakeDef::create(Func, T)); 3436 Context.insert(InstFakeDef::create(Func, T));
3328 _mov(Dest, T); 3437 _mov(Dest, T);
3329 UnimplementedError(Func->getContext()->getFlags()); 3438 UnimplementedError(Func->getContext()->getFlags());
3330 return; 3439 return;
3331 } 3440 }
3332 // TODO(jvoung): handle folding opportunities. 3441
3333 // cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t 3442 CondARM32::Cond CondIfTrue0, CondIfTrue1, CondIfFalse;
3334 Variable *CmpOpnd0 = legalizeToReg(Condition); 3443 if (!_mov_i1_to_flags(Condition, &CondIfTrue0, &CondIfTrue1, &CondIfFalse)) {
3335 Type CmpOpnd0Ty = CmpOpnd0->getType(); 3444 // "Condition" was not fold.
3336 Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32); 3445 // cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t
3337 assert(CmpOpnd0Ty == IceType_i1); 3446 Variable *CmpOpnd0 = legalizeToReg(Condition);
3338 if (CmpOpnd0Ty != IceType_i32) 3447 Type CmpOpnd0Ty = CmpOpnd0->getType();
3339 _uxt(CmpOpnd0, CmpOpnd0); 3448 Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
3340 _cmp(CmpOpnd0, CmpOpnd1); 3449 assert(CmpOpnd0Ty == IceType_i1);
3341 static constexpr CondARM32::Cond Cond = CondARM32::NE; 3450 if (CmpOpnd0Ty != IceType_i32)
3451 _uxt(CmpOpnd0, CmpOpnd0);
3452 _cmp(CmpOpnd0, CmpOpnd1);
3453 CondIfTrue0 = CondARM32::NE;
3454 CondIfTrue1 = CondARM32::kNone;
3455 CondIfFalse = CondARM32::EQ;
3456 }
3457
3342 if (DestTy == IceType_i64) { 3458 if (DestTy == IceType_i64) {
3343 SrcT = legalizeUndef(SrcT); 3459 SrcT = legalizeUndef(SrcT);
3344 SrcF = legalizeUndef(SrcF); 3460 SrcF = legalizeUndef(SrcF);
3345 // Set the low portion. 3461 // Set the low portion.
3346 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3462 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3463 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex);
3347 Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg | Legal_Flex); 3464 Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg | Legal_Flex);
3348 Variable *TLo = makeReg(SrcFLo->getType()); 3465 Variable *TLo = makeReg(SrcFLo->getType());
3349 _mov(TLo, SrcFLo); 3466 bool RedefineTLo = false;
3350 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex); 3467 if (CondIfFalse != CondARM32::kNone) {
3351 _mov_redefined(TLo, SrcTLo, Cond); 3468 _mov(TLo, SrcFLo, CondIfFalse);
3469 RedefineTLo = true;
3470 }
3471 if (CondIfTrue0 != CondARM32::kNone) {
3472 if (!RedefineTLo)
3473 _mov(TLo, SrcTLo, CondIfTrue0);
3474 else
3475 _mov_redefined(TLo, SrcTLo, CondIfTrue0);
3476 RedefineTLo = true;
3477 }
3478 if (CondIfTrue1 != CondARM32::kNone) {
3479 assert(RedefineTLo);
3480 _mov_redefined(TLo, SrcTLo, CondIfTrue1);
3481 }
3352 _mov(DestLo, TLo); 3482 _mov(DestLo, TLo);
3483
3353 // Set the high portion. 3484 // Set the high portion.
3354 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3485 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3486 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex);
3355 Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg | Legal_Flex); 3487 Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg | Legal_Flex);
3356 Variable *THi = makeReg(SrcFHi->getType()); 3488 Variable *THi = makeReg(SrcFHi->getType());
3357 _mov(THi, SrcFHi); 3489 bool RedefineTHi = false;
3358 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex); 3490 if (CondIfFalse != CondARM32::kNone) {
3359 _mov_redefined(THi, SrcTHi, Cond); 3491 _mov(THi, SrcFHi, CondIfFalse);
3492 RedefineTHi = true;
3493 }
3494 if (CondIfTrue0 != CondARM32::kNone) {
3495 if (!RedefineTHi)
3496 _mov(THi, SrcTHi, CondIfTrue0);
3497 else
3498 _mov_redefined(THi, SrcTHi, CondIfTrue0);
3499 RedefineTHi = true;
3500 }
3501 if (CondIfTrue1 != CondARM32::kNone) {
3502 assert(RedefineTHi);
3503 _mov_redefined(THi, SrcTHi, CondIfTrue1);
3504 }
3360 _mov(DestHi, THi); 3505 _mov(DestHi, THi);
3361 return; 3506 return;
3362 } 3507 }
3363 3508
3364 if (isFloatingType(DestTy)) { 3509 if (isFloatingType(DestTy)) {
3510 SrcT = legalizeToReg(SrcT);
3511 SrcF = legalizeToReg(SrcF);
3365 Variable *T = makeReg(DestTy); 3512 Variable *T = makeReg(DestTy);
3366 SrcF = legalizeToReg(SrcF);
3367 assert(DestTy == SrcF->getType()); 3513 assert(DestTy == SrcF->getType());
3368 _mov(T, SrcF); 3514 bool RedefineT = false;
3369 SrcT = legalizeToReg(SrcT); 3515 if (CondIfFalse != CondARM32::kNone) {
3516 _mov(T, SrcF, CondIfFalse);
3517 RedefineT = true;
3518 }
3519 if (CondIfTrue0 != CondARM32::kNone) {
3520 if (!RedefineT)
3521 _mov(T, SrcT, CondIfTrue0);
3522 else
3523 _mov_redefined(T, SrcT, CondIfTrue0);
3524 RedefineT = true;
3525 }
3526 if (CondIfTrue1 != CondARM32::kNone) {
3527 assert(RedefineT);
3528 _mov_redefined(T, SrcT, CondIfTrue1);
3529 }
3370 assert(DestTy == SrcT->getType()); 3530 assert(DestTy == SrcT->getType());
3371 _mov(T, SrcT, Cond);
3372 _set_dest_redefined();
3373 _mov(Dest, T); 3531 _mov(Dest, T);
3374 return; 3532 return;
3375 } 3533 }
3376 3534
3535 Variable *T = makeReg(SrcF->getType());
3536 SrcT = legalize(SrcT, Legal_Reg | Legal_Flex);
3377 SrcF = legalize(SrcF, Legal_Reg | Legal_Flex); 3537 SrcF = legalize(SrcF, Legal_Reg | Legal_Flex);
3378 Variable *T = makeReg(SrcF->getType()); 3538 bool RedefineT = false;
3379 _mov(T, SrcF); 3539 if (CondIfFalse != CondARM32::kNone) {
3380 SrcT = legalize(SrcT, Legal_Reg | Legal_Flex); 3540 _mov(T, SrcF, CondIfFalse);
3381 _mov_redefined(T, SrcT, Cond); 3541 RedefineT = true;
3542 }
3543 if (CondIfTrue0 != CondARM32::kNone) {
3544 if (!RedefineT)
3545 _mov(T, SrcT, CondIfTrue0);
3546 else
3547 _mov_redefined(T, SrcT, CondIfTrue0);
3548 RedefineT = true;
3549 }
3550 if (CondIfTrue1 != CondARM32::kNone) {
3551 assert(RedefineT);
3552 _mov_redefined(T, SrcT, CondIfTrue1);
3553 }
3382 _mov(Dest, T); 3554 _mov(Dest, T);
3383 } 3555 }
3384 3556
3385 void TargetARM32::lowerStore(const InstStore *Inst) { 3557 void TargetARM32::lowerStore(const InstStore *Inst) {
3386 Operand *Value = Inst->getData(); 3558 Operand *Value = Inst->getData();
3387 Operand *Addr = Inst->getAddr(); 3559 Operand *Addr = Inst->getAddr();
3388 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); 3560 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
3389 Type Ty = NewAddr->getType(); 3561 Type Ty = NewAddr->getType();
3390 3562
3391 if (Ty == IceType_i64) { 3563 if (Ty == IceType_i64) {
(...skipping 387 matching lines...) Expand 10 before | Expand all | Expand 10 after
3779 3951
3780 void TargetARM32::emit(const ConstantDouble *C) const { 3952 void TargetARM32::emit(const ConstantDouble *C) const {
3781 (void)C; 3953 (void)C;
3782 UnimplementedError(Ctx->getFlags()); 3954 UnimplementedError(Ctx->getFlags());
3783 } 3955 }
3784 3956
3785 void TargetARM32::emit(const ConstantUndef *) const { 3957 void TargetARM32::emit(const ConstantUndef *) const {
3786 llvm::report_fatal_error("undef value encountered by emitter."); 3958 llvm::report_fatal_error("undef value encountered by emitter.");
3787 } 3959 }
3788 3960
3961 void TargetARM32::lowerTruncToFlags(Operand *Src, CondARM32::Cond *CondIfTrue,
3962 CondARM32::Cond *CondIfFalse) {
3963 Operand *_1 = Ctx->getConstantInt32(1);
3964 Variable *SrcR =
3965 legalizeToReg(Src->getType() == IceType_i64 ? loOperand(Src) : Src);
3966 _tst(SrcR, _1);
3967 *CondIfTrue = CondARM32::NE; // NE <-> APSR.Z == 0
3968 *CondIfFalse = CondARM32::EQ; // EQ <-> APSR.Z == 1
3969 }
3970
3971 bool TargetARM32::_mov_i1_to_flags(Operand *Boolean,
3972 CondARM32::Cond *CondIfTrue0,
3973 CondARM32::Cond *CondIfTrue1,
3974 CondARM32::Cond *CondIfFalse) {
3975 *CondIfTrue0 = CondARM32::kNone;
3976 *CondIfTrue1 = CondARM32::kNone;
3977 *CondIfFalse = CondARM32::AL;
3978 bool FoldOK = false;
3979 if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) {
3980 if (const auto *IcmpProducer = llvm::dyn_cast<InstIcmp>(Producer)) {
3981 lowerIcmpCond(IcmpProducer, CondIfTrue0, CondIfFalse);
3982 FoldOK = true;
3983 } else if (const auto *FcmpProducer = llvm::dyn_cast<InstFcmp>(Producer)) {
3984 lowerFcmpCond(FcmpProducer, CondIfTrue0, CondIfTrue1, CondIfFalse);
3985 FoldOK = true;
3986 } else if (const auto *CastProducer = llvm::dyn_cast<InstCast>(Producer)) {
3987 assert(CastProducer->getCastKind() == InstCast::Trunc);
3988 lowerTruncToFlags(CastProducer->getSrc(0), CondIfTrue0, CondIfFalse);
3989 FoldOK = true;
3990 }
3991 }
3992 return FoldOK;
3993 }
3994
3995 namespace {
3996 namespace BoolFolding {
3997 bool shouldTrackProducer(const Inst &Instr) {
3998 switch (static_cast<uint32_t>(Instr.getKind())) {
3999 case Inst::Icmp:
4000 return true;
4001 case Inst::Fcmp:
4002 return true;
4003 }
4004 if (const auto *Cast = llvm::dyn_cast<InstCast>(&Instr)) {
4005 switch (static_cast<uint32_t>(Cast->getCastKind())) {
4006 case InstCast::Trunc:
4007 return true;
4008 }
4009 }
4010 return false;
4011 }
4012
4013 bool isValidConsumer(const Inst &Instr) {
4014 switch (static_cast<uint32_t>(Instr.getKind())) {
4015 case Inst::Br:
4016 return true;
4017 case Inst::Select:
4018 return !isVectorType(Instr.getDest()->getType());
4019 }
4020 if (const auto *Cast = llvm::dyn_cast<InstCast>(&Instr)) {
4021 switch (static_cast<uint32_t>(Cast->getCastKind())) {
4022 case InstCast::Sext:
4023 return !isVectorType(Instr.getDest()->getType());
4024 case InstCast::Zext:
4025 return !isVectorType(Instr.getDest()->getType());
4026 }
4027 }
4028 return false;
4029 }
4030 } // end of namespace BoolFolding
4031 } // end of anonymous namespace
4032
4033 void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) {
4034 for (Inst &Instr : Node->getInsts()) {
4035 // Check whether Instr is a valid producer.
4036 Variable *Dest = Instr.getDest();
4037 if (!Instr.isDeleted() // only consider non-deleted instructions; and
4038 && Dest // only instructions with an actual dest var; and
4039 && Dest->getType() == IceType_i1 // only bool-type dest vars; and
4040 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
4041 KnownComputations.emplace(Dest->getIndex(), BoolComputationEntry(&Instr));
4042 }
4043 // Check each src variable against the map.
4044 FOREACH_VAR_IN_INST(Var, Instr) {
4045 SizeT VarNum = Var->getIndex();
4046 auto ComputationIter = KnownComputations.find(VarNum);
4047 if (ComputationIter == KnownComputations.end()) {
4048 continue;
4049 }
4050
4051 if (IndexOfVarOperandInInst(Var) != 0 ||
4052 !BoolFolding::isValidConsumer(Instr)) {
4053 // All valid consumers use Var as the first source operand
4054 KnownComputations.erase(VarNum);
4055 continue;
4056 }
4057
4058 if (Instr.isLastUse(Var)) {
4059 ComputationIter->second.IsLiveOut = false;
4060 }
4061 }
4062 }
4063
4064 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
4065 Iter != End;) {
4066 // Disable the folding if its dest may be live beyond this block.
4067 if (Iter->second.IsLiveOut) {
4068 Iter = KnownComputations.erase(Iter);
4069 continue;
4070 }
4071
4072 // Mark as "dead" rather than outright deleting. This is so that other
4073 // peephole style optimizations during or before lowering have access to
4074 // this instruction in undeleted form. See for example
4075 // tryOptimizedCmpxchgCmpBr().
4076 Iter->second.Instr->setDead();
4077 ++Iter;
4078 }
4079 }
4080
3789 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx) 4081 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
3790 : TargetDataLowering(Ctx) {} 4082 : TargetDataLowering(Ctx) {}
3791 4083
3792 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars, 4084 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
3793 const IceString &SectionSuffix) { 4085 const IceString &SectionSuffix) {
3794 switch (Ctx->getFlags().getOutFileType()) { 4086 switch (Ctx->getFlags().getOutFileType()) {
3795 case FT_Elf: { 4087 case FT_Elf: {
3796 ELFObjectWriter *Writer = Ctx->getObjectWriter(); 4088 ELFObjectWriter *Writer = Ctx->getObjectWriter();
3797 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix); 4089 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix);
3798 } break; 4090 } break;
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after
3970 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; 4262 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
3971 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { 4263 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
3972 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; 4264 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
3973 } 4265 }
3974 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 4266 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
3975 // However, for compatibility with current NaCl LLVM, don't claim that. 4267 // However, for compatibility with current NaCl LLVM, don't claim that.
3976 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 4268 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
3977 } 4269 }
3978 4270
3979 } // end of namespace Ice 4271 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/assembler/arm32/branch-mult-fwd.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698