src/IceTargetLoweringARM32.cpp - Issue 1414883007: Subzero. ARM32. Implements bool folding.

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1414883007: Subzero. ARM32. Implements bool folding. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Addresses comments && pulls. Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//	1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

11 /// This file implements the TargetLoweringARM32 class, which consists almost	11 /// This file implements the TargetLoweringARM32 class, which consists almost

12 /// entirely of the lowering sequence for each high-level instruction.	12 /// entirely of the lowering sequence for each high-level instruction.

13 ///	13 ///

14 //===----------------------------------------------------------------------===//	14 //===----------------------------------------------------------------------===//

15 #include "IceTargetLoweringARM32.h"	15 #include "IceTargetLoweringARM32.h"

16	16

17 #include "IceCfg.h"	17 #include "IceCfg.h"

18 #include "IceCfgNode.h"	18 #include "IceCfgNode.h"

19 #include "IceClFlags.h"	19 #include "IceClFlags.h"

20 #include "IceDefs.h"	20 #include "IceDefs.h"

21 #include "IceELFObjectWriter.h"	21 #include "IceELFObjectWriter.h"

22 #include "IceGlobalInits.h"	22 #include "IceGlobalInits.h"

23 #include "IceInstARM32.def"	23 #include "IceInstARM32.def"

24 #include "IceInstARM32.h"	24 #include "IceInstARM32.h"

	25 #include "IceInstVarIter.h"

25 #include "IceLiveness.h"	26 #include "IceLiveness.h"

26 #include "IceOperand.h"	27 #include "IceOperand.h"

27 #include "IcePhiLoweringImpl.h"	28 #include "IcePhiLoweringImpl.h"

28 #include "IceRegistersARM32.h"	29 #include "IceRegistersARM32.h"

29 #include "IceTargetLoweringARM32.def"	30 #include "IceTargetLoweringARM32.def"

30 #include "IceUtils.h"	31 #include "IceUtils.h"

31 #include "llvm/Support/MathExtras.h"	32 #include "llvm/Support/MathExtras.h"

32	33

33 #include <algorithm>	34 #include <algorithm>

34 #include <utility>	35 #include <utility>

(...skipping 1761 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1796 _mov(Dest, SrcR);	1797 _mov(Dest, SrcR);

1797 } else if (isFloatingType(Dest->getType())) {	1798 } else if (isFloatingType(Dest->getType())) {

1798 Variable *SrcR = legalizeToReg(NewSrc);	1799 Variable *SrcR = legalizeToReg(NewSrc);

1799 _mov(Dest, SrcR);	1800 _mov(Dest, SrcR);

1800 } else {	1801 } else {

1801 _mov(Dest, NewSrc);	1802 _mov(Dest, NewSrc);

1802 }	1803 }

1803 }	1804 }

1804 }	1805 }

1805	1806

1806 void TargetARM32::lowerBr(const InstBr *Inst) {	1807 void TargetARM32::lowerBr(const InstBr *Instr) {

1807 if (Inst->isUnconditional()) {	1808 if (Instr->isUnconditional()) {

1808 _br(Inst->getTargetUnconditional());	1809 _br(Instr->getTargetUnconditional());

1809 return;	1810 return;

1810 }	1811 }

1811 Operand *Cond = Inst->getCondition();	1812 Operand *Cond = Instr->getCondition();

1812 // TODO(jvoung): Handle folding opportunities.

1813	1813

1814 Type Ty = Cond->getType();	1814 CondARM32::Cond BrCondTrue0 = CondARM32::NE;

1815 Variable *Src0R = legalizeToReg(Cond);	1815 CondARM32::Cond BrCondTrue1 = CondARM32::kNone;

1816 assert(Ty == IceType_i1);	1816 CondARM32::Cond BrCondFalse = CondARM32::kNone;

1817 if (Ty != IceType_i32)	1817 if (!_mov_i1_to_flags(Cond, &BrCondTrue0, &BrCondTrue1, &BrCondFalse)) {

1818 _uxt(Src0R, Src0R);	1818 // "Cond" was not fold.

1819 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1819 Type Ty = Cond->getType();

1820 _cmp(Src0R, Zero);	1820 Variable *Src0R = legalizeToReg(Cond);

1821 _br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE);	1821 assert(Ty == IceType_i1);

	1822 if (Ty != IceType_i32)

	1823 _uxt(Src0R, Src0R);

	1824 Constant *_0 = Ctx->getConstantZero(IceType_i32);

	1825 _cmp(Src0R, _0);

	1826 BrCondTrue0 = CondARM32::NE;

	1827 }

	1828

	1829 if (BrCondTrue1 != CondARM32::kNone) {

	1830 _br(Instr->getTargetTrue(), BrCondTrue1);

	1831 }

	1832

	1833 if (BrCondTrue0 == CondARM32::kNone) {

	1834 assert(BrCondTrue1 == CondARM32::kNone);

	1835 _br(Instr->getTargetFalse());

	1836 return;

	1837 }

	1838

	1839 if (BrCondTrue0 == CondARM32::AL) {

	1840 assert(BrCondTrue1 == CondARM32::kNone);

	1841 assert(BrCondFalse == CondARM32::kNone);

	1842 _br(Instr->getTargetTrue());

	1843 return;

	1844 }

	1845

	1846 _br(Instr->getTargetTrue(), Instr->getTargetFalse(), BrCondTrue0);

1822 }	1847 }

1823	1848

1824 void TargetARM32::lowerCall(const InstCall *Instr) {	1849 void TargetARM32::lowerCall(const InstCall *Instr) {

1825 MaybeLeafFunc = false;	1850 MaybeLeafFunc = false;

1826 NeedsStackAlignment = true;	1851 NeedsStackAlignment = true;

1827	1852

1828 // Assign arguments to registers and stack. Also reserve stack.	1853 // Assign arguments to registers and stack. Also reserve stack.

1829 TargetARM32::CallingConv CC;	1854 TargetARM32::CallingConv CC;

1830 // Pair of Arg Operand -> GPR number assignments.	1855 // Pair of Arg Operand -> GPR number assignments.

1831 llvm::SmallVector<std::pair<Operand *, int32_t>,	1856 llvm::SmallVector<std::pair<Operand *, int32_t>,

(...skipping 211 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2043 UnimplementedError(Func->getContext()->getFlags());	2068 UnimplementedError(Func->getContext()->getFlags());

2044 } else if (Dest->getType() == IceType_i64) {	2069 } else if (Dest->getType() == IceType_i64) {

2045 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2	2070 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2

2046 Constant *ShiftAmt = Ctx->getConstantInt32(31);	2071 Constant *ShiftAmt = Ctx->getConstantInt32(31);

2047 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	2072 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

2048 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	2073 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

2049 Variable *T_Lo = makeReg(DestLo->getType());	2074 Variable *T_Lo = makeReg(DestLo->getType());

2050 if (Src0->getType() == IceType_i32) {	2075 if (Src0->getType() == IceType_i32) {

2051 Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);	2076 Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);

2052 _mov(T_Lo, Src0RF);	2077 _mov(T_Lo, Src0RF);

2053 } else if (Src0->getType() == IceType_i1) {	2078 } else if (Src0->getType() != IceType_i1) {

2054 Variable *Src0R = legalizeToReg(Src0);

2055 _lsl(T_Lo, Src0R, ShiftAmt);

2056 _asr(T_Lo, T_Lo, ShiftAmt);

2057 } else {

2058 Variable *Src0R = legalizeToReg(Src0);	2079 Variable *Src0R = legalizeToReg(Src0);

2059 _sxt(T_Lo, Src0R);	2080 _sxt(T_Lo, Src0R);

	2081 } else {

	2082 CondARM32::Cond CondTrue0, CondTrue1, CondFalse;

	2083 if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) {

	2084 // Handle bool folding.

	2085 Constant *_0 = Ctx->getConstantZero(IceType_i32);

	2086 Operand *_m1 =

	2087 legalize(Ctx->getConstantInt32(-1), Legal_Reg \| Legal_Flex);

	2088 _cmov(T_Lo, _m1, CondTrue0, CondTrue1, _0, CondFalse);

	2089 } else {

	2090 Variable *Src0R = legalizeToReg(Src0);

	2091 _lsl(T_Lo, Src0R, ShiftAmt);

	2092 _asr(T_Lo, T_Lo, ShiftAmt);

	2093 }

2060 }	2094 }

2061 _mov(DestLo, T_Lo);	2095 _mov(DestLo, T_Lo);

2062 Variable *T_Hi = makeReg(DestHi->getType());	2096 Variable *T_Hi = makeReg(DestHi->getType());

2063 if (Src0->getType() != IceType_i1) {	2097 if (Src0->getType() != IceType_i1) {

2064 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,	2098 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,

2065 OperandARM32::ASR, ShiftAmt));	2099 OperandARM32::ASR, ShiftAmt));

2066 } else {	2100 } else {

2067 // For i1, the asr instruction is already done above.	2101 // For i1, the asr instruction is already done above.

2068 _mov(T_Hi, T_Lo);	2102 _mov(T_Hi, T_Lo);

2069 }	2103 }

2070 _mov(DestHi, T_Hi);	2104 _mov(DestHi, T_Hi);

2071 } else if (Src0->getType() == IceType_i1) {	2105 } else if (Src0->getType() != IceType_i1) {

2072 // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.

2073 // lsl t1, src_reg, 31

2074 // asr t1, t1, 31

2075 // dst = t1

2076 Variable *Src0R = legalizeToReg(Src0);

2077 Constant *ShiftAmt = Ctx->getConstantInt32(31);

2078 Variable *T = makeReg(Dest->getType());

2079 _lsl(T, Src0R, ShiftAmt);

2080 _asr(T, T, ShiftAmt);

2081 _mov(Dest, T);

2082 } else {

2083 // t1 = sxt src; dst = t1	2106 // t1 = sxt src; dst = t1

2084 Variable *Src0R = legalizeToReg(Src0);	2107 Variable *Src0R = legalizeToReg(Src0);

2085 Variable *T = makeReg(Dest->getType());	2108 Variable *T = makeReg(Dest->getType());

2086 _sxt(T, Src0R);	2109 _sxt(T, Src0R);

2087 _mov(Dest, T);	2110 _mov(Dest, T);

	2111 } else {

	2112 Variable *T = makeReg(Dest->getType());

	2113 CondARM32::Cond CondTrue0, CondTrue1, CondFalse;

	2114 if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) {

	2115 // Handle bool folding.

	2116 Constant *_0 = Ctx->getConstantZero(IceType_i32);

	2117 Operand *_m1 =

	2118 legalize(Ctx->getConstantInt32(-1), Legal_Reg \| Legal_Flex);

	2119 _cmov(T, _m1, CondTrue0, CondTrue1, _0, CondFalse);

	2120 } else {

	2121 // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.

	2122 // lsl t1, src_reg, 31

	2123 // asr t1, t1, 31

	2124 // dst = t1

	2125 Variable *Src0R = legalizeToReg(Src0);

	2126 Constant *ShiftAmt = Ctx->getConstantInt32(31);

	2127 _lsl(T, Src0R, ShiftAmt);

	2128 _asr(T, T, ShiftAmt);

	2129 }

	2130 _mov(Dest, T);

2088 }	2131 }

2089 break;	2132 break;

2090 }	2133 }

2091 case InstCast::Zext: {	2134 case InstCast::Zext: {

2092 if (isVectorType(Dest->getType())) {	2135 if (isVectorType(Dest->getType())) {

2093 Variable *T = makeReg(Dest->getType());	2136 Variable *T = makeReg(Dest->getType());

2094 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));	2137 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));

2095 _mov(Dest, T);	2138 _mov(Dest, T);

2096 UnimplementedError(Func->getContext()->getFlags());	2139 UnimplementedError(Func->getContext()->getFlags());

2097 } else if (Dest->getType() == IceType_i64) {	2140 } else if (Dest->getType() == IceType_i64) {

2098 // t1=uxtb src; dst.lo=t1; dst.hi=0	2141 // t1=uxtb src; dst.lo=t1; dst.hi=0

2099 Constant *Zero = Ctx->getConstantZero(IceType_i32);	2142 Constant *_0 = Ctx->getConstantZero(IceType_i32);

	2143 Constant *_1 = Ctx->getConstantInt32(1);

2100 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	2144 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

2101 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	2145 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

2102 Variable *T_Lo = makeReg(DestLo->getType());	2146 Variable *T_Lo = makeReg(DestLo->getType());

	2147

	2148 CondARM32::Cond CondTrue0, CondTrue1, CondFalse;

	2149 if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) {

	2150 // Handle folding opportunities.

	2151 Variable *T_Hi = makeReg(DestLo->getType());

	2152 _mov(T_Hi, _0);

	2153 _mov(DestHi, T_Hi);

	2154 _cmov(T_Lo, _1, CondTrue0, CondTrue1, _0, CondFalse);

	2155 _mov(DestLo, T_Lo);

	2156 return;

	2157 }

	2158

2103 // i32 and i1 can just take up the whole register. i32 doesn't need uxt,	2159 // i32 and i1 can just take up the whole register. i32 doesn't need uxt,

2104 // while i1 will have an and mask later anyway.	2160 // while i1 will have an and mask later anyway.

2105 if (Src0->getType() == IceType_i32 \|\| Src0->getType() == IceType_i1) {	2161 if (Src0->getType() == IceType_i32 \|\| Src0->getType() == IceType_i1) {

2106 Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);	2162 Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);

2107 _mov(T_Lo, Src0RF);	2163 _mov(T_Lo, Src0RF);

2108 } else {	2164 } else {

2109 Variable *Src0R = legalizeToReg(Src0);	2165 Variable *Src0R = legalizeToReg(Src0);

2110 _uxt(T_Lo, Src0R);	2166 _uxt(T_Lo, Src0R);

2111 }	2167 }

2112 if (Src0->getType() == IceType_i1) {	2168 if (Src0->getType() == IceType_i1) {

2113 Constant *One = Ctx->getConstantInt32(1);	2169 Constant *One = Ctx->getConstantInt32(1);

2114 _and(T_Lo, T_Lo, One);	2170 _and(T_Lo, T_Lo, One);

2115 }	2171 }

2116 _mov(DestLo, T_Lo);	2172 _mov(DestLo, T_Lo);

2117 Variable *T_Hi = makeReg(DestLo->getType());	2173 Variable *T_Hi = makeReg(DestLo->getType());

2118 _mov(T_Hi, Zero);	2174 _mov(T_Hi, _0);

2119 _mov(DestHi, T_Hi);	2175 _mov(DestHi, T_Hi);

2120 } else if (Src0->getType() == IceType_i1) {	2176 } else if (Src0->getType() == IceType_i1) {

	2177 Constant *_1 = Ctx->getConstantInt32(1);

	2178 Variable *T = makeReg(Dest->getType());

	2179

	2180 CondARM32::Cond CondTrue0, CondTrue1, CondFalse;

	2181 if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) {

	2182 // Handle folding opportunities.

	2183 Constant *_0 = Ctx->getConstantZero(IceType_i32);

	2184 _cmov(T, _1, CondTrue0, CondTrue1, _0, CondFalse);

	2185 _mov(Dest, T);

	2186 return;

	2187 }

	2188

2121 // t = Src0; t &= 1; Dest = t	2189 // t = Src0; t &= 1; Dest = t

2122 Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);	2190 Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);

2123 Constant *One = Ctx->getConstantInt32(1);

2124 Variable *T = makeReg(Dest->getType());

2125 // Just use _mov instead of _uxt since all registers are 32-bit. _uxt	2191 // Just use _mov instead of _uxt since all registers are 32-bit. _uxt

2126 // requires the source to be a register so could have required a _mov	2192 // requires the source to be a register so could have required a _mov

2127 // from legalize anyway.	2193 // from legalize anyway.

2128 _mov(T, Src0RF);	2194 _mov(T, Src0RF);

2129 _and(T, T, One);	2195 _and(T, T, _1);

2130 _mov(Dest, T);	2196 _mov(Dest, T);

2131 } else {	2197 } else {

2132 // t1 = uxt src; dst = t1	2198 // t1 = uxt src; dst = t1

2133 Variable *Src0R = legalizeToReg(Src0);	2199 Variable *Src0R = legalizeToReg(Src0);

2134 Variable *T = makeReg(Dest->getType());	2200 Variable *T = makeReg(Dest->getType());

2135 _uxt(T, Src0R);	2201 _uxt(T, Src0R);

2136 _mov(Dest, T);	2202 _mov(Dest, T);

2137 }	2203 }

2138 break;	2204 break;

2139 }	2205 }

(...skipping 250 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2390 CondARM32::Cond CC1;	2456 CondARM32::Cond CC1;

2391 } TableFcmp[] = {	2457 } TableFcmp[] = {

2392 #define X(val, CC0, CC1) \	2458 #define X(val, CC0, CC1) \

2393 { CondARM32::CC0, CondARM32::CC1 } \	2459 { CondARM32::CC0, CondARM32::CC1 } \

2394 ,	2460 ,

2395 FCMPARM32_TABLE	2461 FCMPARM32_TABLE

2396 #undef X	2462 #undef X

2397 };	2463 };

2398 } // end of anonymous namespace	2464 } // end of anonymous namespace

2399	2465

2400 void TargetARM32::lowerFcmp(const InstFcmp *Inst) {	2466 void TargetARM32::lowerFcmpCond(const InstFcmp *Instr,

2401 Variable *Dest = Inst->getDest();	2467 CondARM32::Cond *CondIfTrue0,

	2468 CondARM32::Cond *CondIfTrue1,

	2469 CondARM32::Cond *CondIfFalse) {

	2470 InstFcmp::FCond Condition = Instr->getCondition();

	2471 switch (Condition) {

	2472 case InstFcmp::False:

	2473 *CondIfFalse = CondARM32::AL;

	2474 CondIfTrue0 = CondIfTrue1 = CondARM32::kNone;

	2475 break;

	2476 case InstFcmp::True:

	2477 CondIfFalse = CondIfTrue1 = CondARM32::kNone;

	2478 *CondIfTrue0 = CondARM32::AL;

	2479 break;

	2480 default: {

	2481 Variable *Src0R = legalizeToReg(Instr->getSrc(0));

	2482 Variable *Src1R = legalizeToReg(Instr->getSrc(1));

	2483 _vcmp(Src0R, Src1R);

	2484 _vmrs();

	2485 assert(Condition < llvm::array_lengthof(TableFcmp));

	2486 *CondIfTrue0 = TableFcmp[Condition].CC0;

	2487 *CondIfTrue1 = TableFcmp[Condition].CC1;

	2488 CondIfFalse = (CondIfTrue1 != CondARM32::kNone)

	2489 ? CondARM32::AL

	2490 : InstARM32::getOppositeCondition(*CondIfTrue0);

	2491 }

	2492 }

	2493 }

	2494

	2495 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {

	2496 Variable *Dest = Instr->getDest();

2402 if (isVectorType(Dest->getType())) {	2497 if (isVectorType(Dest->getType())) {

2403 Variable *T = makeReg(Dest->getType());	2498 Variable *T = makeReg(Dest->getType());

2404 Context.insert(InstFakeDef::create(Func, T));	2499 Context.insert(InstFakeDef::create(Func, T));

2405 _mov(Dest, T);	2500 _mov(Dest, T);

2406 UnimplementedError(Func->getContext()->getFlags());	2501 UnimplementedError(Func->getContext()->getFlags());

2407 return;	2502 return;

2408 }	2503 }

2409	2504

2410 Variable *Src0R = legalizeToReg(Inst->getSrc(0));

2411 Variable *Src1R = legalizeToReg(Inst->getSrc(1));

2412 Variable *T = makeReg(IceType_i32);	2505 Variable *T = makeReg(IceType_i32);

2413 _vcmp(Src0R, Src1R);	2506 Operand *_1 = Ctx->getConstantInt32(1);

2414 _mov(T, Ctx->getConstantZero(IceType_i32));	2507 Operand *_0 = Ctx->getConstantZero(IceType_i32);

2415 _vmrs();	2508

2416 Operand *One = Ctx->getConstantInt32(1);	2509 CondARM32::Cond CondIfTrue0, CondIfTrue1, CondIfFalse;

2417 InstFcmp::FCond Condition = Inst->getCondition();	2510 lowerFcmpCond(Instr, &CondIfTrue0, &CondIfTrue1, &CondIfFalse);

2418 assert(Condition < llvm::array_lengthof(TableFcmp));	2511

2419 CondARM32::Cond CC0 = TableFcmp[Condition].CC0;	2512 bool RedefineT = false;

2420 CondARM32::Cond CC1 = TableFcmp[Condition].CC1;	2513 if (CondIfFalse != CondARM32::kNone) {

2421 if (CC0 != CondARM32::kNone) {	2514 assert(!RedefineT);

2422 _mov(T, One, CC0);	2515 _mov(T, _0, CondIfFalse);

2423 // If this mov is not a maybe mov, but an actual mov (i.e., CC0 == AL), we	2516 RedefineT = true;

2424 // don't want to _set_dest_redefined so that liveness + dead-code

2425 // elimination will get rid of the previous assignment (i.e., T = 0) above.

2426 // TODO(stichnot,jpp): We should be able to conditionally create the "T=0"

2427 // instruction based on CC0, instead of relying on DCE to remove it.

2428 if (CC0 != CondARM32::AL)

2429 _set_dest_redefined();

2430 }	2517 }

2431 if (CC1 != CondARM32::kNone) {	2518

2432 assert(CC0 != CondARM32::kNone);	2519 if (CondIfTrue0 != CondARM32::kNone) {

2433 assert(CC1 != CondARM32::AL);	2520 if (RedefineT) {

2434 _mov_redefined(T, One, CC1);	2521 _mov_redefined(T, _1, CondIfTrue0);

	2522 } else {

	2523 _mov(T, _1, CondIfTrue0);

	2524 }

	2525 RedefineT = true;

2435 }	2526 }

	2527

	2528 if (CondIfTrue1 != CondARM32::kNone) {

	2529 assert(RedefineT);

	2530 _mov_redefined(T, _1, CondIfTrue1);

	2531 }

	2532

2436 _mov(Dest, T);	2533 _mov(Dest, T);

2437 }	2534 }

2438	2535

2439 void TargetARM32::lowerIcmp(const InstIcmp *Inst) {	2536 void TargetARM32::lowerIcmpCond(const InstIcmp *Inst,

2440 Variable *Dest = Inst->getDest();	2537 CondARM32::Cond *CondIfTrue,

	2538 CondARM32::Cond *CondIfFalse) {

2441 Operand *Src0 = legalizeUndef(Inst->getSrc(0));	2539 Operand *Src0 = legalizeUndef(Inst->getSrc(0));

2442 Operand *Src1 = legalizeUndef(Inst->getSrc(1));	2540 Operand *Src1 = legalizeUndef(Inst->getSrc(1));

2443	2541

2444 if (isVectorType(Dest->getType())) {

2445 Variable *T = makeReg(Dest->getType());

2446 Context.insert(InstFakeDef::create(Func, T));

2447 _mov(Dest, T);

2448 UnimplementedError(Func->getContext()->getFlags());

2449 return;

2450 }

2451

2452 // a=icmp cond, b, c ==>	2542 // a=icmp cond, b, c ==>

2453 // GCC does:	2543 // GCC does:

2454 // cmp b.hi, c.hi or cmp b.lo, c.lo	2544 // cmp b.hi, c.hi or cmp b.lo, c.lo

2455 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi	2545 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi

2456 // mov.<C1> t, #1 mov.<C1> t, #1	2546 // mov.<C1> t, #1 mov.<C1> t, #1

2457 // mov.<C2> t, #0 mov.<C2> t, #0	2547 // mov.<C2> t, #0 mov.<C2> t, #0

2458 // mov a, t mov a, t	2548 // mov a, t mov a, t

2459 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"	2549 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"

2460 // is used for signed compares. In some cases, b and c need to be swapped as	2550 // is used for signed compares. In some cases, b and c need to be swapped as

2461 // well.	2551 // well.

2462 //	2552 //

2463 // LLVM does:	2553 // LLVM does:

2464 // for EQ and NE:	2554 // for EQ and NE:

2465 // eor t1, b.hi, c.hi	2555 // eor t1, b.hi, c.hi

2466 // eor t2, b.lo, c.hi	2556 // eor t2, b.lo, c.hi

2467 // orrs t, t1, t2	2557 // orrs t, t1, t2

2468 // mov.<C> t, #1	2558 // mov.<C> t, #1

2469 // mov a, t	2559 // mov a, t

2470 //	2560 //

2471 // that's nice in that it's just as short but has fewer dependencies for	2561 // that's nice in that it's just as short but has fewer dependencies for

2472 // better ILP at the cost of more registers.	2562 // better ILP at the cost of more registers.

2473 //	2563 //

2474 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two	2564 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two

2475 // unconditional mov #0, two cmps, two conditional mov #1, and one	2565 // unconditional mov #0, two cmps, two conditional mov #1, and one

2476 // conditional reg mov. That has few dependencies for good ILP, but is a	2566 // conditional reg mov. That has few dependencies for good ILP, but is a

2477 // longer sequence.	2567 // longer sequence.

2478 //	2568 //

2479 // So, we are going with the GCC version since it's usually better (except	2569 // So, we are going with the GCC version since it's usually better (except

2480 // perhaps for eq/ne). We could revisit special-casing eq/ne later.	2570 // perhaps for eq/ne). We could revisit special-casing eq/ne later.

2481 Constant *Zero = Ctx->getConstantZero(IceType_i32);	2571

2482 Constant *One = Ctx->getConstantInt32(1);

2483 if (Src0->getType() == IceType_i64) {	2572 if (Src0->getType() == IceType_i64) {

2484 InstIcmp::ICond Conditon = Inst->getCondition();	2573 InstIcmp::ICond Conditon = Inst->getCondition();

2485 size_t Index = static_cast<size_t>(Conditon);	2574 size_t Index = static_cast<size_t>(Conditon);

2486 assert(Index < llvm::array_lengthof(TableIcmp64));	2575 assert(Index < llvm::array_lengthof(TableIcmp64));

2487 Variable Src0Lo, Src0Hi;	2576 Variable Src0Lo, Src0Hi;

2488 Operand Src1LoRF, Src1HiRF;	2577 Operand Src1LoRF, Src1HiRF;

2489 if (TableIcmp64[Index].Swapped) {	2578 if (TableIcmp64[Index].Swapped) {

2490 Src0Lo = legalizeToReg(loOperand(Src1));	2579 Src0Lo = legalizeToReg(loOperand(Src1));

2491 Src0Hi = legalizeToReg(hiOperand(Src1));	2580 Src0Hi = legalizeToReg(hiOperand(Src1));

2492 Src1LoRF = legalize(loOperand(Src0), Legal_Reg \| Legal_Flex);	2581 Src1LoRF = legalize(loOperand(Src0), Legal_Reg \| Legal_Flex);

2493 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg \| Legal_Flex);	2582 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg \| Legal_Flex);

2494 } else {	2583 } else {

2495 Src0Lo = legalizeToReg(loOperand(Src0));	2584 Src0Lo = legalizeToReg(loOperand(Src0));

2496 Src0Hi = legalizeToReg(hiOperand(Src0));	2585 Src0Hi = legalizeToReg(hiOperand(Src0));

2497 Src1LoRF = legalize(loOperand(Src1), Legal_Reg \| Legal_Flex);	2586 Src1LoRF = legalize(loOperand(Src1), Legal_Reg \| Legal_Flex);

2498 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg \| Legal_Flex);	2587 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg \| Legal_Flex);

2499 }	2588 }

2500 Variable *T = makeReg(IceType_i32);

2501 if (TableIcmp64[Index].IsSigned) {	2589 if (TableIcmp64[Index].IsSigned) {

2502 Variable *ScratchReg = makeReg(IceType_i32);	2590 Variable *ScratchReg = makeReg(IceType_i32);

2503 _cmp(Src0Lo, Src1LoRF);	2591 _cmp(Src0Lo, Src1LoRF);

2504 _sbcs(ScratchReg, Src0Hi, Src1HiRF);	2592 _sbcs(ScratchReg, Src0Hi, Src1HiRF);

2505 // ScratchReg isn't going to be used, but we need the side-effect of	2593 // ScratchReg isn't going to be used, but we need the side-effect of

2506 // setting flags from this operation.	2594 // setting flags from this operation.

2507 Context.insert(InstFakeUse::create(Func, ScratchReg));	2595 Context.insert(InstFakeUse::create(Func, ScratchReg));

2508 } else {	2596 } else {

2509 _cmp(Src0Hi, Src1HiRF);	2597 _cmp(Src0Hi, Src1HiRF);

2510 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);	2598 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);

2511 }	2599 }

2512 _mov(T, One, TableIcmp64[Index].C1);	2600 *CondIfTrue = TableIcmp64[Index].C1;

2513 _mov_redefined(T, Zero, TableIcmp64[Index].C2);	2601 *CondIfFalse = TableIcmp64[Index].C2;

2514 _mov(Dest, T);

2515 return;	2602 return;

2516 }	2603 }

2517	2604

2518 // a=icmp cond b, c ==>	2605 // a=icmp cond b, c ==>

2519 // GCC does:	2606 // GCC does:

2520 // <u/s>xtb tb, b	2607 // <u/s>xtb tb, b

2521 // <u/s>xtb tc, c	2608 // <u/s>xtb tc, c

2522 // cmp tb, tc	2609 // cmp tb, tc

2523 // mov.C1 t, #0	2610 // mov.C1 t, #0

2524 // mov.C2 t, #1	2611 // mov.C2 t, #1

(...skipping 16 matching lines...) Expand all Loading...
2541 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For	2628 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For

2542 // the unsigned case, for some reason it does similar to GCC and does a uxtb	2629 // the unsigned case, for some reason it does similar to GCC and does a uxtb

2543 // first. It's not clear to me why that special-casing is needed.	2630 // first. It's not clear to me why that special-casing is needed.

2544 //	2631 //

2545 // We'll go with the LLVM way for now, since it's shorter and has just as few	2632 // We'll go with the LLVM way for now, since it's shorter and has just as few

2546 // dependencies.	2633 // dependencies.

2547 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());	2634 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());

2548 assert(ShiftAmt >= 0);	2635 assert(ShiftAmt >= 0);

2549 Constant *ShiftConst = nullptr;	2636 Constant *ShiftConst = nullptr;

2550 Variable *Src0R = nullptr;	2637 Variable *Src0R = nullptr;

2551 Variable *T = makeReg(IceType_i32);

2552 if (ShiftAmt) {	2638 if (ShiftAmt) {

2553 ShiftConst = Ctx->getConstantInt32(ShiftAmt);	2639 ShiftConst = Ctx->getConstantInt32(ShiftAmt);

2554 Src0R = makeReg(IceType_i32);	2640 Src0R = makeReg(IceType_i32);

2555 _lsl(Src0R, legalizeToReg(Src0), ShiftConst);	2641 _lsl(Src0R, legalizeToReg(Src0), ShiftConst);

2556 } else {	2642 } else {

2557 Src0R = legalizeToReg(Src0);	2643 Src0R = legalizeToReg(Src0);

2558 }	2644 }

2559 _mov(T, Zero);

2560 if (ShiftAmt) {	2645 if (ShiftAmt) {

2561 Variable *Src1R = legalizeToReg(Src1);	2646 Variable *Src1R = legalizeToReg(Src1);

2562 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(	2647 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(

2563 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);	2648 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);

2564 _cmp(Src0R, Src1RShifted);	2649 _cmp(Src0R, Src1RShifted);

2565 } else {	2650 } else {

2566 Operand *Src1RF = legalize(Src1, Legal_Reg \| Legal_Flex);	2651 Operand *Src1RF = legalize(Src1, Legal_Reg \| Legal_Flex);

2567 _cmp(Src0R, Src1RF);	2652 _cmp(Src0R, Src1RF);

2568 }	2653 }

2569 _mov_redefined(T, One, getIcmp32Mapping(Inst->getCondition()));	2654 *CondIfTrue = getIcmp32Mapping(Inst->getCondition());

	2655 CondIfFalse = InstARM32::getOppositeCondition(CondIfTrue);

	2656 }

	2657

	2658 void TargetARM32::lowerIcmp(const InstIcmp *Inst) {

	2659 Variable *Dest = Inst->getDest();

	2660

	2661 if (isVectorType(Dest->getType())) {

	2662 Variable *T = makeReg(Dest->getType());

	2663 Context.insert(InstFakeDef::create(Func, T));

	2664 _mov(Dest, T);

	2665 UnimplementedError(Func->getContext()->getFlags());

	2666 return;

	2667 }

	2668

	2669 Constant *_0 = Ctx->getConstantZero(IceType_i32);

	2670 Constant *_1 = Ctx->getConstantInt32(1);

	2671 Variable *T = makeReg(IceType_i32);

	2672

	2673 CondARM32::Cond CondIfTrue, CondIfFalse;

	2674 lowerIcmpCond(Inst, &CondIfTrue, &CondIfFalse);

	2675

	2676 _mov(T, _0, CondIfFalse);

	2677 _mov_redefined(T, _1, CondIfTrue);

2570 _mov(Dest, T);	2678 _mov(Dest, T);

	2679

2571 return;	2680 return;

2572 }	2681 }

2573	2682

2574 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {	2683 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {

2575 (void)Inst;	2684 (void)Inst;

2576 UnimplementedError(Func->getContext()->getFlags());	2685 UnimplementedError(Func->getContext()->getFlags());

2577 }	2686 }

2578	2687

2579 namespace {	2688 namespace {

2580 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {	2689 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {

(...skipping 741 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3322 Operand *SrcF = Inst->getFalseOperand();	3431 Operand *SrcF = Inst->getFalseOperand();

3323 Operand *Condition = Inst->getCondition();	3432 Operand *Condition = Inst->getCondition();

3324	3433

3325 if (isVectorType(DestTy)) {	3434 if (isVectorType(DestTy)) {

3326 Variable *T = makeReg(DestTy);	3435 Variable *T = makeReg(DestTy);

3327 Context.insert(InstFakeDef::create(Func, T));	3436 Context.insert(InstFakeDef::create(Func, T));

3328 _mov(Dest, T);	3437 _mov(Dest, T);

3329 UnimplementedError(Func->getContext()->getFlags());	3438 UnimplementedError(Func->getContext()->getFlags());

3330 return;	3439 return;

3331 }	3440 }

3332 // TODO(jvoung): handle folding opportunities.	3441

3333 // cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t	3442 CondARM32::Cond CondIfTrue0, CondIfTrue1, CondIfFalse;

3334 Variable *CmpOpnd0 = legalizeToReg(Condition);	3443 if (!_mov_i1_to_flags(Condition, &CondIfTrue0, &CondIfTrue1, &CondIfFalse)) {

3335 Type CmpOpnd0Ty = CmpOpnd0->getType();	3444 // "Condition" was not fold.

3336 Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32);	3445 // cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t

3337 assert(CmpOpnd0Ty == IceType_i1);	3446 Variable *CmpOpnd0 = legalizeToReg(Condition);

3338 if (CmpOpnd0Ty != IceType_i32)	3447 Type CmpOpnd0Ty = CmpOpnd0->getType();

3339 _uxt(CmpOpnd0, CmpOpnd0);	3448 Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32);

3340 _cmp(CmpOpnd0, CmpOpnd1);	3449 assert(CmpOpnd0Ty == IceType_i1);

3341 static constexpr CondARM32::Cond Cond = CondARM32::NE;	3450 if (CmpOpnd0Ty != IceType_i32)

	3451 _uxt(CmpOpnd0, CmpOpnd0);

	3452 _cmp(CmpOpnd0, CmpOpnd1);

	3453 CondIfTrue0 = CondARM32::NE;

	3454 CondIfTrue1 = CondARM32::kNone;

	3455 CondIfFalse = CondARM32::EQ;

	3456 }

	3457

3342 if (DestTy == IceType_i64) {	3458 if (DestTy == IceType_i64) {

3343 SrcT = legalizeUndef(SrcT);	3459 SrcT = legalizeUndef(SrcT);

3344 SrcF = legalizeUndef(SrcF);	3460 SrcF = legalizeUndef(SrcF);

3345 // Set the low portion.	3461 // Set the low portion.

3346 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	3462 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

	3463 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg \| Legal_Flex);

3347 Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg \| Legal_Flex);	3464 Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg \| Legal_Flex);

3348 Variable *TLo = makeReg(SrcFLo->getType());	3465 Variable *TLo = makeReg(SrcFLo->getType());

3349 _mov(TLo, SrcFLo);	3466 bool RedefineTLo = false;

3350 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg \| Legal_Flex);	3467 if (CondIfFalse != CondARM32::kNone) {

3351 _mov_redefined(TLo, SrcTLo, Cond);	3468 _mov(TLo, SrcFLo, CondIfFalse);

	3469 RedefineTLo = true;

	3470 }

	3471 if (CondIfTrue0 != CondARM32::kNone) {

	3472 if (!RedefineTLo)

	3473 _mov(TLo, SrcTLo, CondIfTrue0);

	3474 else

	3475 _mov_redefined(TLo, SrcTLo, CondIfTrue0);

	3476 RedefineTLo = true;

	3477 }

	3478 if (CondIfTrue1 != CondARM32::kNone) {

	3479 assert(RedefineTLo);

	3480 _mov_redefined(TLo, SrcTLo, CondIfTrue1);

	3481 }

3352 _mov(DestLo, TLo);	3482 _mov(DestLo, TLo);

	3483

3353 // Set the high portion.	3484 // Set the high portion.

3354 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	3485 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	3486 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg \| Legal_Flex);

3355 Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg \| Legal_Flex);	3487 Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg \| Legal_Flex);

3356 Variable *THi = makeReg(SrcFHi->getType());	3488 Variable *THi = makeReg(SrcFHi->getType());

3357 _mov(THi, SrcFHi);	3489 bool RedefineTHi = false;

3358 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg \| Legal_Flex);	3490 if (CondIfFalse != CondARM32::kNone) {

3359 _mov_redefined(THi, SrcTHi, Cond);	3491 _mov(THi, SrcFHi, CondIfFalse);

	3492 RedefineTHi = true;

	3493 }

	3494 if (CondIfTrue0 != CondARM32::kNone) {

	3495 if (!RedefineTHi)

	3496 _mov(THi, SrcTHi, CondIfTrue0);

	3497 else

	3498 _mov_redefined(THi, SrcTHi, CondIfTrue0);

	3499 RedefineTHi = true;

	3500 }

	3501 if (CondIfTrue1 != CondARM32::kNone) {

	3502 assert(RedefineTHi);

	3503 _mov_redefined(THi, SrcTHi, CondIfTrue1);

	3504 }

3360 _mov(DestHi, THi);	3505 _mov(DestHi, THi);

3361 return;	3506 return;

3362 }	3507 }

3363	3508

3364 if (isFloatingType(DestTy)) {	3509 if (isFloatingType(DestTy)) {

	3510 SrcT = legalizeToReg(SrcT);

	3511 SrcF = legalizeToReg(SrcF);

3365 Variable *T = makeReg(DestTy);	3512 Variable *T = makeReg(DestTy);

3366 SrcF = legalizeToReg(SrcF);

3367 assert(DestTy == SrcF->getType());	3513 assert(DestTy == SrcF->getType());

3368 _mov(T, SrcF);	3514 bool RedefineT = false;

3369 SrcT = legalizeToReg(SrcT);	3515 if (CondIfFalse != CondARM32::kNone) {

	3516 _mov(T, SrcF, CondIfFalse);

	3517 RedefineT = true;

	3518 }

	3519 if (CondIfTrue0 != CondARM32::kNone) {

	3520 if (!RedefineT)

	3521 _mov(T, SrcT, CondIfTrue0);

	3522 else

	3523 _mov_redefined(T, SrcT, CondIfTrue0);

	3524 RedefineT = true;

	3525 }

	3526 if (CondIfTrue1 != CondARM32::kNone) {

	3527 assert(RedefineT);

	3528 _mov_redefined(T, SrcT, CondIfTrue1);

	3529 }

3370 assert(DestTy == SrcT->getType());	3530 assert(DestTy == SrcT->getType());

3371 _mov(T, SrcT, Cond);

3372 _set_dest_redefined();

3373 _mov(Dest, T);	3531 _mov(Dest, T);

3374 return;	3532 return;

3375 }	3533 }

3376	3534

	3535 Variable *T = makeReg(SrcF->getType());

	3536 SrcT = legalize(SrcT, Legal_Reg \| Legal_Flex);

3377 SrcF = legalize(SrcF, Legal_Reg \| Legal_Flex);	3537 SrcF = legalize(SrcF, Legal_Reg \| Legal_Flex);

3378 Variable *T = makeReg(SrcF->getType());	3538 bool RedefineT = false;

3379 _mov(T, SrcF);	3539 if (CondIfFalse != CondARM32::kNone) {

3380 SrcT = legalize(SrcT, Legal_Reg \| Legal_Flex);	3540 _mov(T, SrcF, CondIfFalse);

3381 _mov_redefined(T, SrcT, Cond);	3541 RedefineT = true;

	3542 }

	3543 if (CondIfTrue0 != CondARM32::kNone) {

	3544 if (!RedefineT)

	3545 _mov(T, SrcT, CondIfTrue0);

	3546 else

	3547 _mov_redefined(T, SrcT, CondIfTrue0);

	3548 RedefineT = true;

	3549 }

	3550 if (CondIfTrue1 != CondARM32::kNone) {

	3551 assert(RedefineT);

	3552 _mov_redefined(T, SrcT, CondIfTrue1);

	3553 }

3382 _mov(Dest, T);	3554 _mov(Dest, T);

3383 }	3555 }

3384	3556

3385 void TargetARM32::lowerStore(const InstStore *Inst) {	3557 void TargetARM32::lowerStore(const InstStore *Inst) {

3386 Operand *Value = Inst->getData();	3558 Operand *Value = Inst->getData();

3387 Operand *Addr = Inst->getAddr();	3559 Operand *Addr = Inst->getAddr();

3388 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());	3560 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());

3389 Type Ty = NewAddr->getType();	3561 Type Ty = NewAddr->getType();

3390	3562

3391 if (Ty == IceType_i64) {	3563 if (Ty == IceType_i64) {

(...skipping 387 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3779	3951

3780 void TargetARM32::emit(const ConstantDouble *C) const {	3952 void TargetARM32::emit(const ConstantDouble *C) const {

3781 (void)C;	3953 (void)C;

3782 UnimplementedError(Ctx->getFlags());	3954 UnimplementedError(Ctx->getFlags());

3783 }	3955 }

3784	3956

3785 void TargetARM32::emit(const ConstantUndef *) const {	3957 void TargetARM32::emit(const ConstantUndef *) const {

3786 llvm::report_fatal_error("undef value encountered by emitter.");	3958 llvm::report_fatal_error("undef value encountered by emitter.");

3787 }	3959 }

3788	3960

	3961 void TargetARM32::lowerTruncToFlags(Operand Src, CondARM32::Cond CondIfTrue,

	3962 CondARM32::Cond *CondIfFalse) {

	3963 Operand *_1 = Ctx->getConstantInt32(1);

	3964 Variable *SrcR =

	3965 legalizeToReg(Src->getType() == IceType_i64 ? loOperand(Src) : Src);

	3966 _tst(SrcR, _1);

	3967 *CondIfTrue = CondARM32::NE; // NE <-> APSR.Z == 0

	3968 *CondIfFalse = CondARM32::EQ; // EQ <-> APSR.Z == 1

	3969 }

	3970

	3971 bool TargetARM32::_mov_i1_to_flags(Operand *Boolean,

	3972 CondARM32::Cond *CondIfTrue0,

	3973 CondARM32::Cond *CondIfTrue1,

	3974 CondARM32::Cond *CondIfFalse) {

	3975 *CondIfTrue0 = CondARM32::kNone;

	3976 *CondIfTrue1 = CondARM32::kNone;

	3977 *CondIfFalse = CondARM32::AL;

	3978 bool FoldOK = false;

	3979 if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) {

	3980 if (const auto *IcmpProducer = llvm::dyn_cast<InstIcmp>(Producer)) {

	3981 lowerIcmpCond(IcmpProducer, CondIfTrue0, CondIfFalse);

	3982 FoldOK = true;

	3983 } else if (const auto *FcmpProducer = llvm::dyn_cast<InstFcmp>(Producer)) {

	3984 lowerFcmpCond(FcmpProducer, CondIfTrue0, CondIfTrue1, CondIfFalse);

	3985 FoldOK = true;

	3986 } else if (const auto *CastProducer = llvm::dyn_cast<InstCast>(Producer)) {

	3987 assert(CastProducer->getCastKind() == InstCast::Trunc);

	3988 lowerTruncToFlags(CastProducer->getSrc(0), CondIfTrue0, CondIfFalse);

	3989 FoldOK = true;

	3990 }

	3991 }

	3992 return FoldOK;

	3993 }

	3994

	3995 namespace {

	3996 namespace BoolFolding {

	3997 bool shouldTrackProducer(const Inst &Instr) {

	3998 switch (static_cast<uint32_t>(Instr.getKind())) {

	3999 case Inst::Icmp:

	4000 return true;

	4001 case Inst::Fcmp:

	4002 return true;

	4003 }

	4004 if (const auto *Cast = llvm::dyn_cast<InstCast>(&Instr)) {

	4005 switch (static_cast<uint32_t>(Cast->getCastKind())) {

	4006 case InstCast::Trunc:

	4007 return true;

	4008 }

	4009 }

	4010 return false;

	4011 }

	4012

	4013 bool isValidConsumer(const Inst &Instr) {

	4014 switch (static_cast<uint32_t>(Instr.getKind())) {

	4015 case Inst::Br:

	4016 return true;

	4017 case Inst::Select:

	4018 return !isVectorType(Instr.getDest()->getType());

	4019 }

	4020 if (const auto *Cast = llvm::dyn_cast<InstCast>(&Instr)) {

	4021 switch (static_cast<uint32_t>(Cast->getCastKind())) {

	4022 case InstCast::Sext:

	4023 return !isVectorType(Instr.getDest()->getType());

	4024 case InstCast::Zext:

	4025 return !isVectorType(Instr.getDest()->getType());

	4026 }

	4027 }

	4028 return false;

	4029 }

	4030 } // end of namespace BoolFolding

	4031 } // end of anonymous namespace

	4032

	4033 void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) {

	4034 for (Inst &Instr : Node->getInsts()) {

	4035 // Check whether Instr is a valid producer.

	4036 Variable *Dest = Instr.getDest();

	4037 if (!Instr.isDeleted() // only consider non-deleted instructions; and

	4038 && Dest // only instructions with an actual dest var; and

	4039 && Dest->getType() == IceType_i1 // only bool-type dest vars; and

	4040 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.

	4041 KnownComputations.emplace(Dest->getIndex(), BoolComputationEntry(&Instr));

	4042 }

	4043 // Check each src variable against the map.

	4044 FOREACH_VAR_IN_INST(Var, Instr) {

	4045 SizeT VarNum = Var->getIndex();

	4046 auto ComputationIter = KnownComputations.find(VarNum);

	4047 if (ComputationIter == KnownComputations.end()) {

	4048 continue;

	4049 }

	4050

	4051 if (IndexOfVarOperandInInst(Var) != 0 \|\|

	4052 !BoolFolding::isValidConsumer(Instr)) {

	4053 // All valid consumers use Var as the first source operand

	4054 KnownComputations.erase(VarNum);

	4055 continue;

	4056 }

	4057

	4058 if (Instr.isLastUse(Var)) {

	4059 ComputationIter->second.IsLiveOut = false;

	4060 }

	4061 }

	4062 }

	4063

	4064 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();

	4065 Iter != End;) {

	4066 // Disable the folding if its dest may be live beyond this block.

	4067 if (Iter->second.IsLiveOut) {

	4068 Iter = KnownComputations.erase(Iter);

	4069 continue;

	4070 }

	4071

	4072 // Mark as "dead" rather than outright deleting. This is so that other

	4073 // peephole style optimizations during or before lowering have access to

	4074 // this instruction in undeleted form. See for example

	4075 // tryOptimizedCmpxchgCmpBr().

	4076 Iter->second.Instr->setDead();

	4077 ++Iter;

	4078 }

	4079 }

	4080

3789 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)	4081 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)

3790 : TargetDataLowering(Ctx) {}	4082 : TargetDataLowering(Ctx) {}

3791	4083

3792 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,	4084 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,

3793 const IceString &SectionSuffix) {	4085 const IceString &SectionSuffix) {

3794 switch (Ctx->getFlags().getOutFileType()) {	4086 switch (Ctx->getFlags().getOutFileType()) {

3795 case FT_Elf: {	4087 case FT_Elf: {

3796 ELFObjectWriter *Writer = Ctx->getObjectWriter();	4088 ELFObjectWriter *Writer = Ctx->getObjectWriter();

3797 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix);	4089 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix);

3798 } break;	4090 } break;

(...skipping 171 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3970 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";	4262 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";

3971 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {	4263 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {

3972 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";	4264 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";

3973 }	4265 }

3974 // Technically R9 is used for TLS with Sandboxing, and we reserve it.	4266 // Technically R9 is used for TLS with Sandboxing, and we reserve it.

3975 // However, for compatibility with current NaCl LLVM, don't claim that.	4267 // However, for compatibility with current NaCl LLVM, don't claim that.

3976 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";	4268 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";

3977 }	4269 }

3978	4270

3979 } // end of namespace Ice	4271 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/assembler/arm32/branch-mult-fwd.ll » ('j') | no next file with comments »