Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(789)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1222943003: ARM32: Lower more integer intrinsics and test. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: move to TargetLowering Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringX86Base.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 1557 matching lines...) Expand 10 before | Expand all | Expand 10 after
1568 // eliminated after lowering, we would need to ensure that the 1568 // eliminated after lowering, we would need to ensure that the
1569 // pre-call and the post-call esp adjustment get eliminated as well. 1569 // pre-call and the post-call esp adjustment get eliminated as well.
1570 if (ParameterAreaSizeBytes) { 1570 if (ParameterAreaSizeBytes) {
1571 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), 1571 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1572 Legal_Reg | Legal_Flex); 1572 Legal_Reg | Legal_Flex);
1573 _adjust_stack(ParameterAreaSizeBytes, SubAmount); 1573 _adjust_stack(ParameterAreaSizeBytes, SubAmount);
1574 } 1574 }
1575 1575
1576 // Copy arguments that are passed on the stack to the appropriate 1576 // Copy arguments that are passed on the stack to the appropriate
1577 // stack locations. 1577 // stack locations.
1578 Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp); 1578 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1579 for (auto &StackArg : StackArgs) { 1579 for (auto &StackArg : StackArgs) {
1580 ConstantInteger32 *Loc = 1580 ConstantInteger32 *Loc =
1581 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second)); 1581 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
1582 Type Ty = StackArg.first->getType(); 1582 Type Ty = StackArg.first->getType();
1583 OperandARM32Mem *Addr; 1583 OperandARM32Mem *Addr;
1584 constexpr bool SignExt = false; 1584 constexpr bool SignExt = false;
1585 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) { 1585 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
1586 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc); 1586 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
1587 } else { 1587 } else {
1588 Variable *NewBase = Func->makeVariable(SP->getType()); 1588 Variable *NewBase = Func->makeVariable(SP->getType());
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
1655 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); 1655 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
1656 Context.insert(NewCall); 1656 Context.insert(NewCall);
1657 if (ReturnRegHi) 1657 if (ReturnRegHi)
1658 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 1658 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
1659 1659
1660 // Add the appropriate offset to SP. The call instruction takes care 1660 // Add the appropriate offset to SP. The call instruction takes care
1661 // of resetting the stack offset during emission. 1661 // of resetting the stack offset during emission.
1662 if (ParameterAreaSizeBytes) { 1662 if (ParameterAreaSizeBytes) {
1663 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), 1663 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1664 Legal_Reg | Legal_Flex); 1664 Legal_Reg | Legal_Flex);
1665 Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp); 1665 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1666 _add(SP, SP, AddAmount); 1666 _add(SP, SP, AddAmount);
1667 } 1667 }
1668 1668
1669 // Insert a register-kill pseudo instruction. 1669 // Insert a register-kill pseudo instruction.
1670 Context.insert(InstFakeKill::create(Func, NewCall)); 1670 Context.insert(InstFakeKill::create(Func, NewCall));
1671 1671
1672 // Generate a FakeUse to keep the call live if necessary. 1672 // Generate a FakeUse to keep the call live if necessary.
1673 if (Instr->hasSideEffects() && ReturnReg) { 1673 if (Instr->hasSideEffects() && ReturnReg) {
1674 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); 1674 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
1675 Context.insert(FakeUse); 1675 Context.insert(FakeUse);
(...skipping 349 matching lines...) Expand 10 before | Expand all | Expand 10 after
2025 return; 2025 return;
2026 } 2026 }
2027 case Intrinsics::AtomicRMW: 2027 case Intrinsics::AtomicRMW:
2028 UnimplementedError(Func->getContext()->getFlags()); 2028 UnimplementedError(Func->getContext()->getFlags());
2029 return; 2029 return;
2030 case Intrinsics::AtomicStore: { 2030 case Intrinsics::AtomicStore: {
2031 UnimplementedError(Func->getContext()->getFlags()); 2031 UnimplementedError(Func->getContext()->getFlags());
2032 return; 2032 return;
2033 } 2033 }
2034 case Intrinsics::Bswap: { 2034 case Intrinsics::Bswap: {
2035 UnimplementedError(Func->getContext()->getFlags()); 2035 Variable *Dest = Instr->getDest();
2036 Operand *Val = Instr->getArg(0);
2037 Type Ty = Val->getType();
2038 if (Ty == IceType_i64) {
2039 Variable *Val_Lo = legalizeToVar(loOperand(Val));
2040 Variable *Val_Hi = legalizeToVar(hiOperand(Val));
2041 Variable *T_Lo = makeReg(IceType_i32);
2042 Variable *T_Hi = makeReg(IceType_i32);
2043 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2044 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2045 _rev(T_Lo, Val_Lo);
2046 _rev(T_Hi, Val_Hi);
2047 _mov(DestLo, T_Hi);
2048 _mov(DestHi, T_Lo);
2049 } else {
2050 assert(Ty == IceType_i32 || Ty == IceType_i16);
2051 Variable *ValR = legalizeToVar(Val);
2052 Variable *T = makeReg(Ty);
2053 _rev(T, ValR);
2054 if (Val->getType() == IceType_i16) {
2055 Operand *Sixteen =
2056 legalize(Ctx->getConstantInt32(16), Legal_Reg | Legal_Flex);
2057 _lsr(T, T, Sixteen);
2058 }
2059 _mov(Dest, T);
2060 }
2036 return; 2061 return;
2037 } 2062 }
2038 case Intrinsics::Ctpop: { 2063 case Intrinsics::Ctpop: {
2039 UnimplementedError(Func->getContext()->getFlags()); 2064 Variable *Dest = Instr->getDest();
2065 Operand *Val = Instr->getArg(0);
2066 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
2067 ? H_call_ctpop_i32
2068 : H_call_ctpop_i64,
2069 Dest, 1);
2070 Call->addArg(Val);
2071 lowerCall(Call);
2072 // The popcount helpers always return 32-bit values, while the intrinsic's
2073 // signature matches some 64-bit platform's native instructions and
2074 // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest
2075 // just in case the user doesn't do that in the IR or doesn't toss the bits
2076 // via truncate.
2077 if (Val->getType() == IceType_i64) {
2078 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2079 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2080 _mov(DestHi, Zero);
2081 }
2040 return; 2082 return;
2041 } 2083 }
2042 case Intrinsics::Ctlz: { 2084 case Intrinsics::Ctlz: {
2043 UnimplementedError(Func->getContext()->getFlags()); 2085 // The "is zero undef" parameter is ignored and we always return
2086 // a well-defined value.
2087 Operand *Val = Instr->getArg(0);
2088 Variable *ValLoR;
2089 Variable *ValHiR = nullptr;
2090 if (Val->getType() == IceType_i64) {
2091 ValLoR = legalizeToVar(loOperand(Val));
2092 ValHiR = legalizeToVar(hiOperand(Val));
2093 } else {
2094 ValLoR = legalizeToVar(Val);
2095 }
2096 lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
2044 return; 2097 return;
2045 } 2098 }
2046 case Intrinsics::Cttz: { 2099 case Intrinsics::Cttz: {
2047 UnimplementedError(Func->getContext()->getFlags()); 2100 // Essentially like Clz, but reverse the bits first.
2101 Operand *Val = Instr->getArg(0);
2102 Variable *ValLoR;
2103 Variable *ValHiR = nullptr;
2104 if (Val->getType() == IceType_i64) {
2105 ValLoR = legalizeToVar(loOperand(Val));
2106 ValHiR = legalizeToVar(hiOperand(Val));
2107 Variable *TLo = makeReg(IceType_i32);
2108 Variable *THi = makeReg(IceType_i32);
2109 _rbit(TLo, ValLoR);
2110 _rbit(THi, ValHiR);
2111 ValLoR = THi;
2112 ValHiR = TLo;
2113 } else {
2114 ValLoR = legalizeToVar(Val);
2115 Variable *T = makeReg(IceType_i32);
2116 _rbit(T, ValLoR);
2117 ValLoR = T;
2118 }
2119 lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
2048 return; 2120 return;
2049 } 2121 }
2050 case Intrinsics::Fabs: { 2122 case Intrinsics::Fabs: {
2051 UnimplementedError(Func->getContext()->getFlags()); 2123 UnimplementedError(Func->getContext()->getFlags());
2052 return; 2124 return;
2053 } 2125 }
2054 case Intrinsics::Longjmp: { 2126 case Intrinsics::Longjmp: {
2055 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2); 2127 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
2056 Call->addArg(Instr->getArg(0)); 2128 Call->addArg(Instr->getArg(0));
2057 Call->addArg(Instr->getArg(1)); 2129 Call->addArg(Instr->getArg(1));
(...skipping 12 matching lines...) Expand all
2070 } 2142 }
2071 case Intrinsics::Memmove: { 2143 case Intrinsics::Memmove: {
2072 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3); 2144 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
2073 Call->addArg(Instr->getArg(0)); 2145 Call->addArg(Instr->getArg(0));
2074 Call->addArg(Instr->getArg(1)); 2146 Call->addArg(Instr->getArg(1));
2075 Call->addArg(Instr->getArg(2)); 2147 Call->addArg(Instr->getArg(2));
2076 lowerCall(Call); 2148 lowerCall(Call);
2077 return; 2149 return;
2078 } 2150 }
2079 case Intrinsics::Memset: { 2151 case Intrinsics::Memset: {
2080 // The value operand needs to be extended to a stack slot size 2152 // The value operand needs to be extended to a stack slot size because the
2081 // because the PNaCl ABI requires arguments to be at least 32 bits 2153 // PNaCl ABI requires arguments to be at least 32 bits wide.
2082 // wide.
2083 Operand *ValOp = Instr->getArg(1); 2154 Operand *ValOp = Instr->getArg(1);
2084 assert(ValOp->getType() == IceType_i8); 2155 assert(ValOp->getType() == IceType_i8);
2085 Variable *ValExt = Func->makeVariable(stackSlotType()); 2156 Variable *ValExt = Func->makeVariable(stackSlotType());
2086 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp)); 2157 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
2158 // Technically, ARM has their own __aeabi_memset, but we can use plain
2159 // memset too. The value and size argument need to be flipped if we ever
2160 // decide to use __aeabi_memset.
2087 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3); 2161 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
2088 Call->addArg(Instr->getArg(0)); 2162 Call->addArg(Instr->getArg(0));
2089 Call->addArg(ValExt); 2163 Call->addArg(ValExt);
2090 Call->addArg(Instr->getArg(2)); 2164 Call->addArg(Instr->getArg(2));
2091 lowerCall(Call); 2165 lowerCall(Call);
2092 return; 2166 return;
2093 } 2167 }
2094 case Intrinsics::NaClReadTP: { 2168 case Intrinsics::NaClReadTP: {
2095 if (Ctx->getFlags().getUseSandboxing()) { 2169 if (Ctx->getFlags().getUseSandboxing()) {
2096 UnimplementedError(Func->getContext()->getFlags()); 2170 UnimplementedError(Func->getContext()->getFlags());
2097 } else { 2171 } else {
2098 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0); 2172 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
2099 lowerCall(Call); 2173 lowerCall(Call);
2100 } 2174 }
2101 return; 2175 return;
2102 } 2176 }
2103 case Intrinsics::Setjmp: { 2177 case Intrinsics::Setjmp: {
2104 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1); 2178 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
2105 Call->addArg(Instr->getArg(0)); 2179 Call->addArg(Instr->getArg(0));
2106 lowerCall(Call); 2180 lowerCall(Call);
2107 return; 2181 return;
2108 } 2182 }
2109 case Intrinsics::Sqrt: { 2183 case Intrinsics::Sqrt: {
2110 UnimplementedError(Func->getContext()->getFlags()); 2184 UnimplementedError(Func->getContext()->getFlags());
2111 return; 2185 return;
2112 } 2186 }
2113 case Intrinsics::Stacksave: { 2187 case Intrinsics::Stacksave: {
2114 UnimplementedError(Func->getContext()->getFlags()); 2188 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2189 Variable *Dest = Instr->getDest();
2190 _mov(Dest, SP);
2115 return; 2191 return;
2116 } 2192 }
2117 case Intrinsics::Stackrestore: { 2193 case Intrinsics::Stackrestore: {
2118 UnimplementedError(Func->getContext()->getFlags()); 2194 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2195 Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex);
2196 _mov_nonkillable(SP, Val);
2119 return; 2197 return;
2120 } 2198 }
2121 case Intrinsics::Trap: 2199 case Intrinsics::Trap:
2122 UnimplementedError(Func->getContext()->getFlags()); 2200 _trap();
2123 return; 2201 return;
2124 case Intrinsics::UnknownIntrinsic: 2202 case Intrinsics::UnknownIntrinsic:
2125 Func->setError("Should not be lowering UnknownIntrinsic"); 2203 Func->setError("Should not be lowering UnknownIntrinsic");
2126 return; 2204 return;
2127 } 2205 }
2128 return; 2206 return;
2129 } 2207 }
2130 2208
2209 void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
2210 Type Ty = Dest->getType();
2211 assert(Ty == IceType_i32 || Ty == IceType_i64);
2212 Variable *T = makeReg(IceType_i32);
2213 _clz(T, ValLoR);
2214 if (Ty == IceType_i64) {
2215 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2216 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2217 Operand *Zero =
2218 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2219 Operand *ThirtyTwo =
2220 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2221 _cmp(ValHiR, Zero);
2222 Variable *T2 = makeReg(IceType_i32);
2223 _add(T2, T, ThirtyTwo);
2224 _clz(T2, ValHiR, CondARM32::NE);
2225 // T2 is actually a source as well when the predicate is not AL
2226 // (since it may leave T2 alone). We use set_dest_nonkillable to
2227 // prolong the liveness of T2 as if it was used as a source.
2228 _set_dest_nonkillable();
2229 _mov(DestLo, T2);
2230 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
2231 return;
2232 }
2233 _mov(Dest, T);
2234 return;
2235 }
2236
2131 void TargetARM32::lowerLoad(const InstLoad *Load) { 2237 void TargetARM32::lowerLoad(const InstLoad *Load) {
2132 // A Load instruction can be treated the same as an Assign 2238 // A Load instruction can be treated the same as an Assign
2133 // instruction, after the source operand is transformed into an 2239 // instruction, after the source operand is transformed into an
2134 // OperandARM32Mem operand. 2240 // OperandARM32Mem operand.
2135 Type Ty = Load->getDest()->getType(); 2241 Type Ty = Load->getDest()->getType();
2136 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); 2242 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
2137 Variable *DestLoad = Load->getDest(); 2243 Variable *DestLoad = Load->getDest();
2138 2244
2139 // TODO(jvoung): handled folding opportunities. Sign and zero extension 2245 // TODO(jvoung): handled folding opportunities. Sign and zero extension
2140 // can be folded into a load. 2246 // can be folded into a load.
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
2179 // addEpilog explicitly looks for a ret instruction as a marker for 2285 // addEpilog explicitly looks for a ret instruction as a marker for
2180 // where to insert the frame removal instructions. 2286 // where to insert the frame removal instructions.
2181 // addEpilog is responsible for restoring the "lr" register as needed 2287 // addEpilog is responsible for restoring the "lr" register as needed
2182 // prior to this ret instruction. 2288 // prior to this ret instruction.
2183 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg); 2289 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
2184 // Add a fake use of sp to make sure sp stays alive for the entire 2290 // Add a fake use of sp to make sure sp stays alive for the entire
2185 // function. Otherwise post-call sp adjustments get dead-code 2291 // function. Otherwise post-call sp adjustments get dead-code
2186 // eliminated. TODO: Are there more places where the fake use 2292 // eliminated. TODO: Are there more places where the fake use
2187 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not 2293 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
2188 // have a ret instruction. 2294 // have a ret instruction.
2189 Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp); 2295 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2190 Context.insert(InstFakeUse::create(Func, SP)); 2296 Context.insert(InstFakeUse::create(Func, SP));
2191 } 2297 }
2192 2298
2193 void TargetARM32::lowerSelect(const InstSelect *Inst) { 2299 void TargetARM32::lowerSelect(const InstSelect *Inst) {
2194 Variable *Dest = Inst->getDest(); 2300 Variable *Dest = Inst->getDest();
2195 Type DestTy = Dest->getType(); 2301 Type DestTy = Dest->getType();
2196 Operand *SrcT = Inst->getTrueOperand(); 2302 Operand *SrcT = Inst->getTrueOperand();
2197 Operand *SrcF = Inst->getFalseOperand(); 2303 Operand *SrcF = Inst->getFalseOperand();
2198 Operand *Condition = Inst->getCondition(); 2304 Operand *Condition = Inst->getCondition();
2199 2305
(...skipping 440 matching lines...) Expand 10 before | Expand all | Expand 10 after
2640 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; 2746 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
2641 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { 2747 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
2642 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; 2748 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
2643 } 2749 }
2644 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 2750 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
2645 // However, for compatibility with current NaCl LLVM, don't claim that. 2751 // However, for compatibility with current NaCl LLVM, don't claim that.
2646 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 2752 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
2647 } 2753 }
2648 2754
2649 } // end of namespace Ice 2755 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringX86Base.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698