OLD | NEW |
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
188 I64PairRegisters[RegARM32::val] = isI64Pair; \ | 188 I64PairRegisters[RegARM32::val] = isI64Pair; \ |
189 Float32Registers[RegARM32::val] = isFP32; \ | 189 Float32Registers[RegARM32::val] = isFP32; \ |
190 Float64Registers[RegARM32::val] = isFP64; \ | 190 Float64Registers[RegARM32::val] = isFP64; \ |
191 VectorRegisters[RegARM32::val] = isVec128; \ | 191 VectorRegisters[RegARM32::val] = isVec128; \ |
192 RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \ | 192 RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \ |
193 for (SizeT RegAlias : alias_init) { \ | 193 for (SizeT RegAlias : alias_init) { \ |
194 assert(!RegisterAliases[RegARM32::val][RegAlias] && \ | 194 assert(!RegisterAliases[RegARM32::val][RegAlias] && \ |
195 "Duplicate alias for " #val); \ | 195 "Duplicate alias for " #val); \ |
196 RegisterAliases[RegARM32::val].set(RegAlias); \ | 196 RegisterAliases[RegARM32::val].set(RegAlias); \ |
197 } \ | 197 } \ |
198 RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \ | |
199 assert(RegisterAliases[RegARM32::val][RegARM32::val]); \ | 198 assert(RegisterAliases[RegARM32::val][RegARM32::val]); \ |
200 ScratchRegs[RegARM32::val] = scratch; | 199 ScratchRegs[RegARM32::val] = scratch; |
201 REGARM32_TABLE; | 200 REGARM32_TABLE; |
202 #undef X | 201 #undef X |
203 TypeToRegisterSet[IceType_void] = InvalidRegisters; | 202 TypeToRegisterSet[IceType_void] = InvalidRegisters; |
204 TypeToRegisterSet[IceType_i1] = IntegerRegisters; | 203 TypeToRegisterSet[IceType_i1] = IntegerRegisters; |
205 TypeToRegisterSet[IceType_i8] = IntegerRegisters; | 204 TypeToRegisterSet[IceType_i8] = IntegerRegisters; |
206 TypeToRegisterSet[IceType_i16] = IntegerRegisters; | 205 TypeToRegisterSet[IceType_i16] = IntegerRegisters; |
207 TypeToRegisterSet[IceType_i32] = IntegerRegisters; | 206 TypeToRegisterSet[IceType_i32] = IntegerRegisters; |
208 TypeToRegisterSet[IceType_i64] = I64PairRegisters; | 207 TypeToRegisterSet[IceType_i64] = I64PairRegisters; |
209 TypeToRegisterSet[IceType_f32] = Float32Registers; | 208 TypeToRegisterSet[IceType_f32] = Float32Registers; |
210 TypeToRegisterSet[IceType_f64] = Float64Registers; | 209 TypeToRegisterSet[IceType_f64] = Float64Registers; |
211 TypeToRegisterSet[IceType_v4i1] = VectorRegisters; | 210 TypeToRegisterSet[IceType_v4i1] = VectorRegisters; |
212 TypeToRegisterSet[IceType_v8i1] = VectorRegisters; | 211 TypeToRegisterSet[IceType_v8i1] = VectorRegisters; |
213 TypeToRegisterSet[IceType_v16i1] = VectorRegisters; | 212 TypeToRegisterSet[IceType_v16i1] = VectorRegisters; |
214 TypeToRegisterSet[IceType_v16i8] = VectorRegisters; | 213 TypeToRegisterSet[IceType_v16i8] = VectorRegisters; |
215 TypeToRegisterSet[IceType_v8i16] = VectorRegisters; | 214 TypeToRegisterSet[IceType_v8i16] = VectorRegisters; |
216 TypeToRegisterSet[IceType_v4i32] = VectorRegisters; | 215 TypeToRegisterSet[IceType_v4i32] = VectorRegisters; |
217 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; | 216 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; |
218 } | 217 } |
219 | 218 |
| 219 namespace { |
| 220 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) { |
| 221 for (Variable *Var : Vars) { |
| 222 auto *Var64 = llvm::dyn_cast<Variable64On32>(Var); |
| 223 if (!Var64) { |
| 224 // This is not the variable we are looking for. |
| 225 continue; |
| 226 } |
| 227 assert(Var64->hasReg() || !Var64->mustHaveReg()); |
| 228 if (!Var64->hasReg()) { |
| 229 continue; |
| 230 } |
| 231 SizeT FirstReg = RegARM32::getI64PairFirstGPRNum(Var->getRegNum()); |
| 232 // This assumes little endian. |
| 233 Variable *Lo = Var64->getLo(); |
| 234 Variable *Hi = Var64->getHi(); |
| 235 assert(Lo->hasReg() == Hi->hasReg()); |
| 236 if (Lo->hasReg()) { |
| 237 continue; |
| 238 } |
| 239 Lo->setRegNum(FirstReg); |
| 240 Lo->setMustHaveReg(); |
| 241 Hi->setRegNum(FirstReg + 1); |
| 242 Hi->setMustHaveReg(); |
| 243 } |
| 244 } |
| 245 } // end of anonymous namespace |
| 246 |
220 void TargetARM32::translateO2() { | 247 void TargetARM32::translateO2() { |
221 TimerMarker T(TimerStack::TT_O2, Func); | 248 TimerMarker T(TimerStack::TT_O2, Func); |
222 | 249 |
223 // TODO(stichnot): share passes with X86? | 250 // TODO(stichnot): share passes with X86? |
224 // https://code.google.com/p/nativeclient/issues/detail?id=4094 | 251 // https://code.google.com/p/nativeclient/issues/detail?id=4094 |
225 | 252 |
226 if (!Ctx->getFlags().getPhiEdgeSplit()) { | 253 if (!Ctx->getFlags().getPhiEdgeSplit()) { |
227 // Lower Phi instructions. | 254 // Lower Phi instructions. |
228 Func->placePhiLoads(); | 255 Func->placePhiLoads(); |
229 if (Func->hasError()) | 256 if (Func->hasError()) |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
277 // Validate the live range computations. The expensive validation call is | 304 // Validate the live range computations. The expensive validation call is |
278 // deliberately only made when assertions are enabled. | 305 // deliberately only made when assertions are enabled. |
279 assert(Func->validateLiveness()); | 306 assert(Func->validateLiveness()); |
280 // The post-codegen dump is done here, after liveness analysis and associated | 307 // The post-codegen dump is done here, after liveness analysis and associated |
281 // cleanup, to make the dump cleaner and more useful. | 308 // cleanup, to make the dump cleaner and more useful. |
282 Func->dump("After initial ARM32 codegen"); | 309 Func->dump("After initial ARM32 codegen"); |
283 Func->getVMetadata()->init(VMK_All); | 310 Func->getVMetadata()->init(VMK_All); |
284 regAlloc(RAK_Global); | 311 regAlloc(RAK_Global); |
285 if (Func->hasError()) | 312 if (Func->hasError()) |
286 return; | 313 return; |
| 314 copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); |
287 Func->dump("After linear scan regalloc"); | 315 Func->dump("After linear scan regalloc"); |
288 | 316 |
289 if (Ctx->getFlags().getPhiEdgeSplit()) { | 317 if (Ctx->getFlags().getPhiEdgeSplit()) { |
290 Func->advancedPhiLowering(); | 318 Func->advancedPhiLowering(); |
291 Func->dump("After advanced Phi lowering"); | 319 Func->dump("After advanced Phi lowering"); |
292 } | 320 } |
293 | 321 |
294 // Stack frame mapping. | 322 // Stack frame mapping. |
295 Func->genFrame(); | 323 Func->genFrame(); |
296 if (Func->hasError()) | 324 if (Func->hasError()) |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
337 Func->doArgLowering(); | 365 Func->doArgLowering(); |
338 | 366 |
339 Func->genCode(); | 367 Func->genCode(); |
340 if (Func->hasError()) | 368 if (Func->hasError()) |
341 return; | 369 return; |
342 Func->dump("After initial ARM32 codegen"); | 370 Func->dump("After initial ARM32 codegen"); |
343 | 371 |
344 regAlloc(RAK_InfOnly); | 372 regAlloc(RAK_InfOnly); |
345 if (Func->hasError()) | 373 if (Func->hasError()) |
346 return; | 374 return; |
| 375 copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); |
347 Func->dump("After regalloc of infinite-weight variables"); | 376 Func->dump("After regalloc of infinite-weight variables"); |
348 | 377 |
349 Func->genFrame(); | 378 Func->genFrame(); |
350 if (Func->hasError()) | 379 if (Func->hasError()) |
351 return; | 380 return; |
352 Func->dump("After stack frame mapping"); | 381 Func->dump("After stack frame mapping"); |
353 | 382 |
354 legalizeStackSlots(); | 383 legalizeStackSlots(); |
355 if (Func->hasError()) | 384 if (Func->hasError()) |
356 return; | 385 return; |
(...skipping 252 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
609 // value from the stack slot. | 638 // value from the stack slot. |
610 if (Arg->hasReg()) { | 639 if (Arg->hasReg()) { |
611 assert(Ty != IceType_i64); | 640 assert(Ty != IceType_i64); |
612 // This should be simple, just load the parameter off the stack using a nice | 641 // This should be simple, just load the parameter off the stack using a nice |
613 // sp + imm addressing mode. Because ARM, we can't do that (e.g., VLDR, for | 642 // sp + imm addressing mode. Because ARM, we can't do that (e.g., VLDR, for |
614 // fp types, cannot have an index register), so we legalize the memory | 643 // fp types, cannot have an index register), so we legalize the memory |
615 // operand instead. | 644 // operand instead. |
616 auto *Mem = OperandARM32Mem::create( | 645 auto *Mem = OperandARM32Mem::create( |
617 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( | 646 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( |
618 Ctx->getConstantInt32(Arg->getStackOffset()))); | 647 Ctx->getConstantInt32(Arg->getStackOffset()))); |
619 legalizeToReg(Mem, Arg->getRegNum()); | 648 _mov(Arg, legalizeToReg(Mem, Arg->getRegNum())); |
620 // This argument-copying instruction uses an explicit OperandARM32Mem | 649 // This argument-copying instruction uses an explicit OperandARM32Mem |
621 // operand instead of a Variable, so its fill-from-stack operation has to | 650 // operand instead of a Variable, so its fill-from-stack operation has to |
622 // be tracked separately for statistics. | 651 // be tracked separately for statistics. |
623 Ctx->statsUpdateFills(); | 652 Ctx->statsUpdateFills(); |
624 } | 653 } |
625 } | 654 } |
626 | 655 |
627 Type TargetARM32::stackSlotType() { return IceType_i32; } | 656 Type TargetARM32::stackSlotType() { return IceType_i32; } |
628 | 657 |
629 void TargetARM32::addProlog(CfgNode *Node) { | 658 void TargetARM32::addProlog(CfgNode *Node) { |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
709 if (UsesFramePointer) { | 738 if (UsesFramePointer) { |
710 CalleeSaves[RegARM32::Reg_fp] = true; | 739 CalleeSaves[RegARM32::Reg_fp] = true; |
711 assert(RegsUsed[RegARM32::Reg_fp] == false); | 740 assert(RegsUsed[RegARM32::Reg_fp] == false); |
712 RegsUsed[RegARM32::Reg_fp] = true; | 741 RegsUsed[RegARM32::Reg_fp] = true; |
713 } | 742 } |
714 if (!MaybeLeafFunc) { | 743 if (!MaybeLeafFunc) { |
715 CalleeSaves[RegARM32::Reg_lr] = true; | 744 CalleeSaves[RegARM32::Reg_lr] = true; |
716 RegsUsed[RegARM32::Reg_lr] = true; | 745 RegsUsed[RegARM32::Reg_lr] = true; |
717 } | 746 } |
718 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | 747 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
| 748 if (RegARM32::isI64RegisterPair(i)) { |
| 749 // We don't save register pairs explicitly. Instead, we rely on the code |
| 750 // fake-defing/fake-using each register in the pair. |
| 751 continue; |
| 752 } |
719 if (CalleeSaves[i] && RegsUsed[i]) { | 753 if (CalleeSaves[i] && RegsUsed[i]) { |
720 // TODO(jvoung): do separate vpush for each floating point register | 754 // TODO(jvoung): do separate vpush for each floating point register |
721 // segment and += 4, or 8 depending on type. | 755 // segment and += 4, or 8 depending on type. |
722 ++NumCallee; | 756 ++NumCallee; |
723 PreservedRegsSizeBytes += 4; | 757 PreservedRegsSizeBytes += 4; |
724 GPRsToPreserve.push_back(getPhysicalRegister(i)); | 758 GPRsToPreserve.push_back(getPhysicalRegister(i)); |
725 } | 759 } |
726 } | 760 } |
727 Ctx->statsUpdateRegistersSaved(NumCallee); | 761 Ctx->statsUpdateRegistersSaved(NumCallee); |
728 if (!GPRsToPreserve.empty()) | 762 if (!GPRsToPreserve.empty()) |
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
877 // Consider FP and LR as callee-save / used as needed. | 911 // Consider FP and LR as callee-save / used as needed. |
878 if (UsesFramePointer) { | 912 if (UsesFramePointer) { |
879 CalleeSaves[RegARM32::Reg_fp] = true; | 913 CalleeSaves[RegARM32::Reg_fp] = true; |
880 } | 914 } |
881 if (!MaybeLeafFunc) { | 915 if (!MaybeLeafFunc) { |
882 CalleeSaves[RegARM32::Reg_lr] = true; | 916 CalleeSaves[RegARM32::Reg_lr] = true; |
883 } | 917 } |
884 // Pop registers in ascending order just like push (instead of in reverse | 918 // Pop registers in ascending order just like push (instead of in reverse |
885 // order). | 919 // order). |
886 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | 920 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
| 921 if (RegARM32::isI64RegisterPair(i)) { |
| 922 continue; |
| 923 } |
| 924 |
887 if (CalleeSaves[i] && RegsUsed[i]) { | 925 if (CalleeSaves[i] && RegsUsed[i]) { |
888 GPRsToRestore.push_back(getPhysicalRegister(i)); | 926 GPRsToRestore.push_back(getPhysicalRegister(i)); |
889 } | 927 } |
890 } | 928 } |
891 if (!GPRsToRestore.empty()) | 929 if (!GPRsToRestore.empty()) |
892 _pop(GPRsToRestore); | 930 _pop(GPRsToRestore); |
893 | 931 |
894 if (!Ctx->getFlags().getUseSandboxing()) | 932 if (!Ctx->getFlags().getUseSandboxing()) |
895 return; | 933 return; |
896 | 934 |
(...skipping 835 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1732 Operand *Src0 = Inst->getSrc(0); | 1770 Operand *Src0 = Inst->getSrc(0); |
1733 assert(Dest->getType() == Src0->getType()); | 1771 assert(Dest->getType() == Src0->getType()); |
1734 if (Dest->getType() == IceType_i64) { | 1772 if (Dest->getType() == IceType_i64) { |
1735 Src0 = legalizeUndef(Src0); | 1773 Src0 = legalizeUndef(Src0); |
1736 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | 1774 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
1737 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | 1775 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); |
1738 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1776 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
1739 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1777 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
1740 Variable *T_Lo = makeReg(IceType_i32); | 1778 Variable *T_Lo = makeReg(IceType_i32); |
1741 Variable *T_Hi = makeReg(IceType_i32); | 1779 Variable *T_Hi = makeReg(IceType_i32); |
| 1780 |
1742 _mov(T_Lo, Src0Lo); | 1781 _mov(T_Lo, Src0Lo); |
1743 _mov(DestLo, T_Lo); | 1782 _mov(DestLo, T_Lo); |
1744 _mov(T_Hi, Src0Hi); | 1783 _mov(T_Hi, Src0Hi); |
1745 _mov(DestHi, T_Hi); | 1784 _mov(DestHi, T_Hi); |
1746 } else { | 1785 } else { |
1747 Operand *NewSrc; | 1786 Operand *NewSrc; |
1748 if (Dest->hasReg()) { | 1787 if (Dest->hasReg()) { |
1749 // If Dest already has a physical register, then legalize the Src operand | 1788 // If Dest already has a physical register, then legalize the Src operand |
1750 // into a Variable with the same register assignment. This especially | 1789 // into a Variable with the same register assignment. This especially |
1751 // helps allow the use of Flex operands. | 1790 // helps allow the use of Flex operands. |
(...skipping 512 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2264 case IceType_i64: { | 2303 case IceType_i64: { |
2265 // t0, t1 <- src0 | 2304 // t0, t1 <- src0 |
2266 // dest[31..0] = t0 | 2305 // dest[31..0] = t0 |
2267 // dest[63..32] = t1 | 2306 // dest[63..32] = t1 |
2268 assert(Src0->getType() == IceType_f64); | 2307 assert(Src0->getType() == IceType_f64); |
2269 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | 2308 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
2270 T->initHiLo(Func); | 2309 T->initHiLo(Func); |
2271 configureBitcastTemporary(T); | 2310 configureBitcastTemporary(T); |
2272 Variable *Src0R = legalizeToReg(Src0); | 2311 Variable *Src0R = legalizeToReg(Src0); |
2273 _mov(T, Src0R); | 2312 _mov(T, Src0R); |
2274 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); | 2313 lowerAssign(InstAssign::create(Func, Dest, T)); |
2275 lowerAssign(InstAssign::create(Func, Dest64On32->getLo(), T->getLo())); | |
2276 lowerAssign(InstAssign::create(Func, Dest64On32->getHi(), T->getHi())); | |
2277 break; | 2314 break; |
2278 } | 2315 } |
2279 case IceType_f64: { | 2316 case IceType_f64: { |
2280 // T0 <- lo(src) | 2317 // T0 <- lo(src) |
2281 // T1 <- hi(src) | 2318 // T1 <- hi(src) |
2282 // vmov T2, T0, T1 | 2319 // vmov T2, T0, T1 |
2283 // Dest <- T2 | 2320 // Dest <- T2 |
2284 assert(Src0->getType() == IceType_i64); | 2321 assert(Src0->getType() == IceType_i64); |
| 2322 Variable *T = makeReg(DestType); |
2285 auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | 2323 auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
2286 Src64->initHiLo(Func); | 2324 Src64->initHiLo(Func); |
2287 configureBitcastTemporary(Src64); | 2325 configureBitcastTemporary(Src64); |
2288 lowerAssign(InstAssign::create(Func, Src64, Src0)); | 2326 lowerAssign(InstAssign::create(Func, Src64, Src0)); |
2289 Variable *T = makeReg(IceType_f64); | |
2290 _mov(T, Src64); | 2327 _mov(T, Src64); |
2291 lowerAssign(InstAssign::create(Func, Dest, T)); | 2328 lowerAssign(InstAssign::create(Func, Dest, T)); |
2292 break; | 2329 break; |
2293 } | 2330 } |
2294 case IceType_v4i1: | 2331 case IceType_v4i1: |
2295 case IceType_v8i1: | 2332 case IceType_v8i1: |
2296 case IceType_v16i1: | 2333 case IceType_v16i1: |
2297 case IceType_v8i16: | 2334 case IceType_v8i16: |
2298 case IceType_v16i8: | 2335 case IceType_v16i8: |
2299 case IceType_v4f32: | 2336 case IceType_v4f32: |
(...skipping 230 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2530 _mov_redefined(T, One, getIcmp32Mapping(Inst->getCondition())); | 2567 _mov_redefined(T, One, getIcmp32Mapping(Inst->getCondition())); |
2531 _mov(Dest, T); | 2568 _mov(Dest, T); |
2532 return; | 2569 return; |
2533 } | 2570 } |
2534 | 2571 |
2535 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) { | 2572 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) { |
2536 (void)Inst; | 2573 (void)Inst; |
2537 UnimplementedError(Func->getContext()->getFlags()); | 2574 UnimplementedError(Func->getContext()->getFlags()); |
2538 } | 2575 } |
2539 | 2576 |
| 2577 namespace { |
| 2578 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { |
| 2579 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
| 2580 return Integer->getValue(); |
| 2581 return Intrinsics::MemoryOrderInvalid; |
| 2582 } |
| 2583 } // end of anonymous namespace |
| 2584 |
| 2585 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation, |
| 2586 Operand *Ptr, Operand *Val) { |
| 2587 // retry: |
| 2588 // ldrex contents, [addr] |
| 2589 // op tmp, contents, operand |
| 2590 // strex success, tmp, [addr] |
| 2591 // jne retry |
| 2592 // fake-use(addr, operand) @ prevents undesirable clobbering. |
| 2593 // mov dest, contents |
| 2594 assert(Dest != nullptr); |
| 2595 Type DestTy = Dest->getType(); |
| 2596 (void)Ptr; |
| 2597 (void)Val; |
| 2598 |
| 2599 OperandARM32Mem *Mem; |
| 2600 Variable *PtrContentsReg; |
| 2601 Variable *PtrContentsHiReg; |
| 2602 Variable *PtrContentsLoReg; |
| 2603 Variable *Value = Func->makeVariable(DestTy); |
| 2604 Variable *ValueReg; |
| 2605 Variable *ValueHiReg; |
| 2606 Variable *ValueLoReg; |
| 2607 Variable *Success = makeReg(IceType_i32); |
| 2608 Variable *TmpReg; |
| 2609 Variable *TmpHiReg; |
| 2610 Variable *TmpLoReg; |
| 2611 Operand *_0 = Ctx->getConstantZero(IceType_i32); |
| 2612 InstARM32Label *Retry = InstARM32Label::create(Func, this); |
| 2613 |
| 2614 if (DestTy == IceType_i64) { |
| 2615 Variable64On32 *PtrContentsReg64 = makeI64RegPair(); |
| 2616 PtrContentsHiReg = PtrContentsReg64->getHi(); |
| 2617 PtrContentsLoReg = PtrContentsReg64->getLo(); |
| 2618 PtrContentsReg = PtrContentsReg64; |
| 2619 |
| 2620 llvm::cast<Variable64On32>(Value)->initHiLo(Func); |
| 2621 Variable64On32 *ValueReg64 = makeI64RegPair(); |
| 2622 ValueHiReg = ValueReg64->getHi(); |
| 2623 ValueLoReg = ValueReg64->getLo(); |
| 2624 ValueReg = ValueReg64; |
| 2625 |
| 2626 Variable64On32 *TmpReg64 = makeI64RegPair(); |
| 2627 TmpHiReg = TmpReg64->getHi(); |
| 2628 TmpLoReg = TmpReg64->getLo(); |
| 2629 TmpReg = TmpReg64; |
| 2630 } else { |
| 2631 PtrContentsReg = makeReg(DestTy); |
| 2632 PtrContentsHiReg = nullptr; |
| 2633 PtrContentsLoReg = PtrContentsReg; |
| 2634 |
| 2635 ValueReg = makeReg(DestTy); |
| 2636 ValueHiReg = nullptr; |
| 2637 ValueLoReg = ValueReg; |
| 2638 |
| 2639 TmpReg = makeReg(DestTy); |
| 2640 TmpHiReg = nullptr; |
| 2641 TmpLoReg = TmpReg; |
| 2642 } |
| 2643 |
| 2644 if (DestTy == IceType_i64) { |
| 2645 Context.insert(InstFakeDef::create(Func, Value)); |
| 2646 } |
| 2647 lowerAssign(InstAssign::create(Func, Value, Val)); |
| 2648 |
| 2649 Variable *PtrVar = Func->makeVariable(IceType_i32); |
| 2650 lowerAssign(InstAssign::create(Func, PtrVar, Ptr)); |
| 2651 |
| 2652 _dmb(); |
| 2653 Context.insert(Retry); |
| 2654 Mem = formMemoryOperand(PtrVar, DestTy); |
| 2655 if (DestTy == IceType_i64) { |
| 2656 Context.insert(InstFakeDef::create(Func, ValueReg, Value)); |
| 2657 } |
| 2658 lowerAssign(InstAssign::create(Func, ValueReg, Value)); |
| 2659 if (DestTy == IceType_i8 || DestTy == IceType_i16) { |
| 2660 _uxt(ValueReg, ValueReg); |
| 2661 } |
| 2662 _ldrex(PtrContentsReg, Mem); |
| 2663 |
| 2664 if (DestTy == IceType_i64) { |
| 2665 Context.insert(InstFakeDef::create(Func, TmpReg, ValueReg)); |
| 2666 } |
| 2667 switch (Operation) { |
| 2668 default: |
| 2669 Func->setError("Unknown AtomicRMW operation"); |
| 2670 return; |
| 2671 case Intrinsics::AtomicAdd: |
| 2672 if (DestTy == IceType_i64) { |
| 2673 _adds(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
| 2674 _adc(TmpHiReg, PtrContentsHiReg, ValueHiReg); |
| 2675 } else { |
| 2676 _add(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
| 2677 } |
| 2678 break; |
| 2679 case Intrinsics::AtomicSub: |
| 2680 if (DestTy == IceType_i64) { |
| 2681 _subs(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
| 2682 _sbc(TmpHiReg, PtrContentsHiReg, ValueHiReg); |
| 2683 } else { |
| 2684 _sub(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
| 2685 } |
| 2686 break; |
| 2687 case Intrinsics::AtomicOr: |
| 2688 _orr(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
| 2689 if (DestTy == IceType_i64) { |
| 2690 _orr(TmpHiReg, PtrContentsHiReg, ValueHiReg); |
| 2691 } |
| 2692 break; |
| 2693 case Intrinsics::AtomicAnd: |
| 2694 _and(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
| 2695 if (DestTy == IceType_i64) { |
| 2696 _and(TmpHiReg, PtrContentsHiReg, ValueHiReg); |
| 2697 } |
| 2698 break; |
| 2699 case Intrinsics::AtomicXor: |
| 2700 _eor(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
| 2701 if (DestTy == IceType_i64) { |
| 2702 _eor(TmpHiReg, PtrContentsHiReg, ValueHiReg); |
| 2703 } |
| 2704 break; |
| 2705 case Intrinsics::AtomicExchange: |
| 2706 _mov(TmpLoReg, ValueLoReg); |
| 2707 if (DestTy == IceType_i64) { |
| 2708 _mov(TmpHiReg, ValueHiReg); |
| 2709 } |
| 2710 break; |
| 2711 } |
| 2712 _strex(Success, TmpReg, Mem); |
| 2713 _cmp(Success, _0); |
| 2714 _br(Retry, CondARM32::NE); |
| 2715 |
| 2716 // The following fake-uses ensure that Subzero will not clobber them in the |
| 2717 // load-linked/store-conditional loop above. We might have to spill them, but |
| 2718 // spilling is preferable over incorrect behavior. |
| 2719 Context.insert(InstFakeUse::create(Func, PtrVar)); |
| 2720 if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) { |
| 2721 Context.insert(InstFakeUse::create(Func, Value64->getHi())); |
| 2722 Context.insert(InstFakeUse::create(Func, Value64->getLo())); |
| 2723 } else { |
| 2724 Context.insert(InstFakeUse::create(Func, Value)); |
| 2725 } |
| 2726 _dmb(); |
| 2727 if (DestTy == IceType_i8 || DestTy == IceType_i16) { |
| 2728 _uxt(PtrContentsReg, PtrContentsReg); |
| 2729 } |
| 2730 |
| 2731 if (DestTy == IceType_i64) { |
| 2732 Context.insert(InstFakeUse::create(Func, PtrContentsReg)); |
| 2733 } |
| 2734 lowerAssign(InstAssign::create(Func, Dest, PtrContentsReg)); |
| 2735 if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) { |
| 2736 Context.insert(InstFakeUse::create(Func, Dest64->getLo())); |
| 2737 Context.insert(InstFakeUse::create(Func, Dest64->getHi())); |
| 2738 } else { |
| 2739 Context.insert(InstFakeUse::create(Func, Dest)); |
| 2740 } |
| 2741 } |
| 2742 |
2540 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 2743 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
2541 switch (Instr->getIntrinsicInfo().ID) { | 2744 Variable *Dest = Instr->getDest(); |
| 2745 Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void; |
| 2746 Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID; |
| 2747 switch (ID) { |
| 2748 case Intrinsics::AtomicFence: |
| 2749 case Intrinsics::AtomicFenceAll: |
| 2750 assert(Dest == nullptr); |
| 2751 _dmb(); |
| 2752 return; |
| 2753 case Intrinsics::AtomicIsLockFree: { |
| 2754 Operand *ByteSize = Instr->getArg(0); |
| 2755 auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize); |
| 2756 if (CI == nullptr) { |
| 2757 // The PNaCl ABI requires the byte size to be a compile-time constant. |
| 2758 Func->setError("AtomicIsLockFree byte size should be compile-time const"); |
| 2759 return; |
| 2760 } |
| 2761 static constexpr int32_t NotLockFree = 0; |
| 2762 static constexpr int32_t LockFree = 1; |
| 2763 int32_t Result = NotLockFree; |
| 2764 switch (CI->getValue()) { |
| 2765 case 1: |
| 2766 case 2: |
| 2767 case 4: |
| 2768 case 8: |
| 2769 Result = LockFree; |
| 2770 break; |
| 2771 } |
| 2772 _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result))); |
| 2773 return; |
| 2774 } |
| 2775 case Intrinsics::AtomicLoad: { |
| 2776 assert(isScalarIntegerType(DestTy)); |
| 2777 // We require the memory address to be naturally aligned. Given that is the |
| 2778 // case, then normal loads are atomic. |
| 2779 if (!Intrinsics::isMemoryOrderValid( |
| 2780 ID, getConstantMemoryOrder(Instr->getArg(1)))) { |
| 2781 Func->setError("Unexpected memory ordering for AtomicLoad"); |
| 2782 return; |
| 2783 } |
| 2784 Variable *T; |
| 2785 |
| 2786 if (DestTy == IceType_i64) { |
| 2787 // ldrex is the only arm instruction that is guaranteed to load a 64-bit |
| 2788 // integer atomically. Everything else works with a regular ldr. |
| 2789 T = makeI64RegPair(); |
| 2790 _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64)); |
| 2791 } else { |
| 2792 T = makeReg(DestTy); |
| 2793 _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy)); |
| 2794 } |
| 2795 _dmb(); |
| 2796 lowerAssign(InstAssign::create(Func, Dest, T)); |
| 2797 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. |
| 2798 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert |
| 2799 // the FakeUse on the last-inserted instruction's dest. |
| 2800 Context.insert( |
| 2801 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
| 2802 return; |
| 2803 } |
| 2804 case Intrinsics::AtomicStore: { |
| 2805 // We require the memory address to be naturally aligned. Given that is the |
| 2806 // case, then normal loads are atomic. |
| 2807 if (!Intrinsics::isMemoryOrderValid( |
| 2808 ID, getConstantMemoryOrder(Instr->getArg(2)))) { |
| 2809 Func->setError("Unexpected memory ordering for AtomicStore"); |
| 2810 return; |
| 2811 } |
| 2812 Operand *Value = Instr->getArg(0); |
| 2813 Type ValueTy = Value->getType(); |
| 2814 assert(isScalarIntegerType(ValueTy)); |
| 2815 Operand *Addr = Instr->getArg(1); |
| 2816 |
| 2817 if (ValueTy == IceType_i64) { |
| 2818 // Atomic 64-bit stores require a load-locked/store-conditional loop using |
| 2819 // ldrexd, and strexd. The lowered code is: |
| 2820 // |
| 2821 // retry: |
| 2822 // ldrexd t.lo, t.hi, [addr] |
| 2823 // strexd success, value.lo, value.hi, [addr] |
| 2824 // cmp success, #0 |
| 2825 // bne retry |
| 2826 // fake-use(addr, value.lo, value.hi) |
| 2827 // |
| 2828 // The fake-use is needed to prevent those variables from being clobbered |
| 2829 // in the loop (which will happen under register pressure.) |
| 2830 Variable64On32 *Tmp = makeI64RegPair(); |
| 2831 Variable64On32 *ValueVar = |
| 2832 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
| 2833 Variable *AddrVar = makeReg(IceType_i32); |
| 2834 Variable *Success = makeReg(IceType_i32); |
| 2835 OperandARM32Mem *Mem; |
| 2836 Operand *_0 = Ctx->getConstantZero(IceType_i32); |
| 2837 InstARM32Label *Retry = InstARM32Label::create(Func, this); |
| 2838 Variable64On32 *NewReg = makeI64RegPair(); |
| 2839 ValueVar->initHiLo(Func); |
| 2840 ValueVar->mustNotHaveReg(); |
| 2841 |
| 2842 _dmb(); |
| 2843 lowerAssign(InstAssign::create(Func, ValueVar, Value)); |
| 2844 lowerAssign(InstAssign::create(Func, AddrVar, Addr)); |
| 2845 |
| 2846 Context.insert(Retry); |
| 2847 Context.insert(InstFakeDef::create(Func, NewReg)); |
| 2848 lowerAssign(InstAssign::create(Func, NewReg, ValueVar)); |
| 2849 Mem = formMemoryOperand(AddrVar, IceType_i64); |
| 2850 _ldrex(Tmp, Mem); |
| 2851 // This fake-use both prevents the ldrex from being dead-code eliminated, |
| 2852 // while also keeping liveness happy about all defs being used. |
| 2853 Context.insert( |
| 2854 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
| 2855 _strex(Success, NewReg, Mem); |
| 2856 _cmp(Success, _0); |
| 2857 _br(Retry, CondARM32::NE); |
| 2858 |
| 2859 Context.insert(InstFakeUse::create(Func, ValueVar->getLo())); |
| 2860 Context.insert(InstFakeUse::create(Func, ValueVar->getHi())); |
| 2861 Context.insert(InstFakeUse::create(Func, AddrVar)); |
| 2862 _dmb(); |
| 2863 return; |
| 2864 } |
| 2865 // non-64-bit stores are atomically as long as the address is aligned. This |
| 2866 // is PNaCl, so addresses are aligned. |
| 2867 Variable *T = makeReg(ValueTy); |
| 2868 |
| 2869 _dmb(); |
| 2870 lowerAssign(InstAssign::create(Func, T, Value)); |
| 2871 _str(T, formMemoryOperand(Addr, ValueTy)); |
| 2872 _dmb(); |
| 2873 return; |
| 2874 } |
2542 case Intrinsics::AtomicCmpxchg: { | 2875 case Intrinsics::AtomicCmpxchg: { |
2543 UnimplementedError(Func->getContext()->getFlags()); | 2876 // The initial lowering for cmpxchg was: |
2544 return; | 2877 // |
2545 } | 2878 // retry: |
2546 case Intrinsics::AtomicFence: | 2879 // ldrex tmp, [addr] |
2547 UnimplementedError(Func->getContext()->getFlags()); | 2880 // cmp tmp, expected |
2548 return; | 2881 // mov expected, tmp |
2549 case Intrinsics::AtomicFenceAll: | 2882 // jne retry |
2550 // NOTE: FenceAll should prevent and load/store from being moved across the | 2883 // strex success, new, [addr] |
2551 // fence (both atomic and non-atomic). The InstARM32Mfence instruction is | 2884 // cmp success, #0 |
2552 // currently marked coarsely as "HasSideEffects". | 2885 // bne retry |
2553 UnimplementedError(Func->getContext()->getFlags()); | 2886 // mov dest, expected |
2554 return; | 2887 // |
2555 case Intrinsics::AtomicIsLockFree: { | 2888 // Besides requiring two branches, that lowering could also potentially |
2556 UnimplementedError(Func->getContext()->getFlags()); | 2889 // write to memory (in mov expected, tmp) unless we were OK with increasing |
2557 return; | 2890 // the register pressure and requiring expected to be an infinite-weight |
2558 } | 2891 // variable (spoiler alert: that was a problem for i64 cmpxchg.) Through |
2559 case Intrinsics::AtomicLoad: { | 2892 // careful rewritting, and thanks to predication, we now implement the |
2560 UnimplementedError(Func->getContext()->getFlags()); | 2893 // lowering as: |
2561 return; | 2894 // |
2562 } | 2895 // retry: |
2563 case Intrinsics::AtomicRMW: | 2896 // ldrex tmp, [addr] |
2564 UnimplementedError(Func->getContext()->getFlags()); | 2897 // cmp tmp, expected |
2565 return; | 2898 // strexeq success, new, [addr] |
2566 case Intrinsics::AtomicStore: { | 2899 // movne expected, tmp |
2567 UnimplementedError(Func->getContext()->getFlags()); | 2900 // cmpeq success, #0 |
| 2901 // bne retry |
| 2902 // mov dest, expected |
| 2903 // |
| 2904 // Predication lets us move the strex ahead of the mov expected, tmp, which |
| 2905 // allows tmp to be a non-infinite weight temporary. We wanted to avoid |
| 2906 // writing to memory between ldrex and strex because, even though most times |
| 2907 // that would cause no issues, if any interleaving memory write aliased |
| 2908 // [addr] than we would have undefined behavior. Undefined behavior isn't |
| 2909 // cool, so we try to avoid it. See the "Synchronization and semaphores" |
| 2910 // section of the "ARM Architecture Reference Manual." |
| 2911 |
| 2912 assert(isScalarIntegerType(DestTy)); |
| 2913 // We require the memory address to be naturally aligned. Given that is the |
| 2914 // case, then normal loads are atomic. |
| 2915 if (!Intrinsics::isMemoryOrderValid( |
| 2916 ID, getConstantMemoryOrder(Instr->getArg(3)), |
| 2917 getConstantMemoryOrder(Instr->getArg(4)))) { |
| 2918 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); |
| 2919 return; |
| 2920 } |
| 2921 |
| 2922 OperandARM32Mem *Mem; |
| 2923 Variable *TmpReg; |
| 2924 Variable *Expected, *ExpectedReg; |
| 2925 Variable *New, *NewReg; |
| 2926 Variable *Success = makeReg(IceType_i32); |
| 2927 Operand *_0 = Ctx->getConstantZero(IceType_i32); |
| 2928 InstARM32Label *Retry = InstARM32Label::create(Func, this); |
| 2929 |
| 2930 if (DestTy == IceType_i64) { |
| 2931 Variable64On32 *TmpReg64 = makeI64RegPair(); |
| 2932 Variable64On32 *New64 = |
| 2933 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
| 2934 Variable64On32 *NewReg64 = makeI64RegPair(); |
| 2935 Variable64On32 *Expected64 = |
| 2936 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
| 2937 Variable64On32 *ExpectedReg64 = makeI64RegPair(); |
| 2938 |
| 2939 New64->initHiLo(Func); |
| 2940 New64->mustNotHaveReg(); |
| 2941 Expected64->initHiLo(Func); |
| 2942 Expected64->mustNotHaveReg(); |
| 2943 |
| 2944 TmpReg = TmpReg64; |
| 2945 New = New64; |
| 2946 NewReg = NewReg64; |
| 2947 Expected = Expected64; |
| 2948 ExpectedReg = ExpectedReg64; |
| 2949 } else { |
| 2950 TmpReg = makeReg(DestTy); |
| 2951 New = Func->makeVariable(DestTy); |
| 2952 NewReg = makeReg(DestTy); |
| 2953 Expected = Func->makeVariable(DestTy); |
| 2954 ExpectedReg = makeReg(DestTy); |
| 2955 } |
| 2956 |
| 2957 Mem = formMemoryOperand(Instr->getArg(0), DestTy); |
| 2958 if (DestTy == IceType_i64) { |
| 2959 Context.insert(InstFakeDef::create(Func, Expected)); |
| 2960 } |
| 2961 lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1))); |
| 2962 if (DestTy == IceType_i64) { |
| 2963 Context.insert(InstFakeDef::create(Func, New)); |
| 2964 } |
| 2965 lowerAssign(InstAssign::create(Func, New, Instr->getArg(2))); |
| 2966 _dmb(); |
| 2967 |
| 2968 Context.insert(Retry); |
| 2969 if (DestTy == IceType_i64) { |
| 2970 Context.insert(InstFakeDef::create(Func, ExpectedReg, Expected)); |
| 2971 } |
| 2972 lowerAssign(InstAssign::create(Func, ExpectedReg, Expected)); |
| 2973 if (DestTy == IceType_i64) { |
| 2974 Context.insert(InstFakeDef::create(Func, NewReg, New)); |
| 2975 } |
| 2976 lowerAssign(InstAssign::create(Func, NewReg, New)); |
| 2977 |
| 2978 _ldrex(TmpReg, Mem); |
| 2979 Context.insert( |
| 2980 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
| 2981 if (DestTy == IceType_i64) { |
| 2982 auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg); |
| 2983 auto *ExpectedReg64 = llvm::cast<Variable64On32>(ExpectedReg); |
| 2984 // lowerAssign above has added fake-defs for TmpReg and ExpectedReg. Let's |
| 2985 // keep liveness happy, shall we? |
| 2986 Context.insert(InstFakeUse::create(Func, TmpReg)); |
| 2987 Context.insert(InstFakeUse::create(Func, ExpectedReg)); |
| 2988 _cmp(TmpReg64->getHi(), ExpectedReg64->getHi()); |
| 2989 _cmp(TmpReg64->getLo(), ExpectedReg64->getLo(), CondARM32::EQ); |
| 2990 } else { |
| 2991 _cmp(TmpReg, ExpectedReg); |
| 2992 } |
| 2993 _strex(Success, NewReg, Mem, CondARM32::EQ); |
| 2994 if (DestTy == IceType_i64) { |
| 2995 auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg); |
| 2996 auto *Expected64 = llvm::cast<Variable64On32>(Expected); |
| 2997 _mov_redefined(Expected64->getHi(), TmpReg64->getHi(), CondARM32::NE); |
| 2998 _mov_redefined(Expected64->getLo(), TmpReg64->getLo(), CondARM32::NE); |
| 2999 auto *FakeDef = InstFakeDef::create(Func, Expected, TmpReg); |
| 3000 Context.insert(FakeDef); |
| 3001 FakeDef->setDestRedefined(); |
| 3002 } else { |
| 3003 _mov_redefined(Expected, TmpReg, CondARM32::NE); |
| 3004 } |
| 3005 _cmp(Success, _0, CondARM32::EQ); |
| 3006 _br(Retry, CondARM32::NE); |
| 3007 _dmb(); |
| 3008 lowerAssign(InstAssign::create(Func, Dest, Expected)); |
| 3009 Context.insert(InstFakeUse::create(Func, Expected)); |
| 3010 if (auto *New64 = llvm::dyn_cast<Variable64On32>(New)) { |
| 3011 Context.insert(InstFakeUse::create(Func, New64->getLo())); |
| 3012 Context.insert(InstFakeUse::create(Func, New64->getHi())); |
| 3013 } else { |
| 3014 Context.insert(InstFakeUse::create(Func, New)); |
| 3015 } |
| 3016 return; |
| 3017 } |
| 3018 case Intrinsics::AtomicRMW: { |
| 3019 if (!Intrinsics::isMemoryOrderValid( |
| 3020 ID, getConstantMemoryOrder(Instr->getArg(3)))) { |
| 3021 Func->setError("Unexpected memory ordering for AtomicRMW"); |
| 3022 return; |
| 3023 } |
| 3024 lowerAtomicRMW( |
| 3025 Dest, static_cast<uint32_t>( |
| 3026 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()), |
| 3027 Instr->getArg(1), Instr->getArg(2)); |
2568 return; | 3028 return; |
2569 } | 3029 } |
2570 case Intrinsics::Bswap: { | 3030 case Intrinsics::Bswap: { |
2571 Variable *Dest = Instr->getDest(); | |
2572 Operand *Val = Instr->getArg(0); | 3031 Operand *Val = Instr->getArg(0); |
2573 Type Ty = Val->getType(); | 3032 Type Ty = Val->getType(); |
2574 if (Ty == IceType_i64) { | 3033 if (Ty == IceType_i64) { |
2575 Val = legalizeUndef(Val); | 3034 Val = legalizeUndef(Val); |
2576 Variable *Val_Lo = legalizeToReg(loOperand(Val)); | 3035 Variable *Val_Lo = legalizeToReg(loOperand(Val)); |
2577 Variable *Val_Hi = legalizeToReg(hiOperand(Val)); | 3036 Variable *Val_Hi = legalizeToReg(hiOperand(Val)); |
2578 Variable *T_Lo = makeReg(IceType_i32); | 3037 Variable *T_Lo = makeReg(IceType_i32); |
2579 Variable *T_Hi = makeReg(IceType_i32); | 3038 Variable *T_Hi = makeReg(IceType_i32); |
2580 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3039 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
2581 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3040 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2582 _rev(T_Lo, Val_Lo); | 3041 _rev(T_Lo, Val_Lo); |
2583 _rev(T_Hi, Val_Hi); | 3042 _rev(T_Hi, Val_Hi); |
2584 _mov(DestLo, T_Hi); | 3043 _mov(DestLo, T_Hi); |
2585 _mov(DestHi, T_Lo); | 3044 _mov(DestHi, T_Lo); |
2586 } else { | 3045 } else { |
2587 assert(Ty == IceType_i32 || Ty == IceType_i16); | 3046 assert(Ty == IceType_i32 || Ty == IceType_i16); |
2588 Variable *ValR = legalizeToReg(Val); | 3047 Variable *ValR = legalizeToReg(Val); |
2589 Variable *T = makeReg(Ty); | 3048 Variable *T = makeReg(Ty); |
2590 _rev(T, ValR); | 3049 _rev(T, ValR); |
2591 if (Val->getType() == IceType_i16) { | 3050 if (Val->getType() == IceType_i16) { |
2592 Operand *Sixteen = | 3051 Operand *Sixteen = |
2593 legalize(Ctx->getConstantInt32(16), Legal_Reg | Legal_Flex); | 3052 legalize(Ctx->getConstantInt32(16), Legal_Reg | Legal_Flex); |
2594 _lsr(T, T, Sixteen); | 3053 _lsr(T, T, Sixteen); |
2595 } | 3054 } |
2596 _mov(Dest, T); | 3055 _mov(Dest, T); |
2597 } | 3056 } |
2598 return; | 3057 return; |
2599 } | 3058 } |
2600 case Intrinsics::Ctpop: { | 3059 case Intrinsics::Ctpop: { |
2601 Variable *Dest = Instr->getDest(); | |
2602 Operand *Val = Instr->getArg(0); | 3060 Operand *Val = Instr->getArg(0); |
2603 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) | 3061 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) |
2604 ? H_call_ctpop_i32 | 3062 ? H_call_ctpop_i32 |
2605 : H_call_ctpop_i64, | 3063 : H_call_ctpop_i64, |
2606 Dest, 1); | 3064 Dest, 1); |
2607 Call->addArg(Val); | 3065 Call->addArg(Val); |
2608 lowerCall(Call); | 3066 lowerCall(Call); |
2609 // The popcount helpers always return 32-bit values, while the intrinsic's | 3067 // The popcount helpers always return 32-bit values, while the intrinsic's |
2610 // signature matches some 64-bit platform's native instructions and expect | 3068 // signature matches some 64-bit platform's native instructions and expect |
2611 // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in | 3069 // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in |
(...skipping 14 matching lines...) Expand all Loading... |
2626 Operand *Val = Instr->getArg(0); | 3084 Operand *Val = Instr->getArg(0); |
2627 Variable *ValLoR; | 3085 Variable *ValLoR; |
2628 Variable *ValHiR = nullptr; | 3086 Variable *ValHiR = nullptr; |
2629 if (Val->getType() == IceType_i64) { | 3087 if (Val->getType() == IceType_i64) { |
2630 Val = legalizeUndef(Val); | 3088 Val = legalizeUndef(Val); |
2631 ValLoR = legalizeToReg(loOperand(Val)); | 3089 ValLoR = legalizeToReg(loOperand(Val)); |
2632 ValHiR = legalizeToReg(hiOperand(Val)); | 3090 ValHiR = legalizeToReg(hiOperand(Val)); |
2633 } else { | 3091 } else { |
2634 ValLoR = legalizeToReg(Val); | 3092 ValLoR = legalizeToReg(Val); |
2635 } | 3093 } |
2636 lowerCLZ(Instr->getDest(), ValLoR, ValHiR); | 3094 lowerCLZ(Dest, ValLoR, ValHiR); |
2637 return; | 3095 return; |
2638 } | 3096 } |
2639 case Intrinsics::Cttz: { | 3097 case Intrinsics::Cttz: { |
2640 // Essentially like Clz, but reverse the bits first. | 3098 // Essentially like Clz, but reverse the bits first. |
2641 Operand *Val = Instr->getArg(0); | 3099 Operand *Val = Instr->getArg(0); |
2642 Variable *ValLoR; | 3100 Variable *ValLoR; |
2643 Variable *ValHiR = nullptr; | 3101 Variable *ValHiR = nullptr; |
2644 if (Val->getType() == IceType_i64) { | 3102 if (Val->getType() == IceType_i64) { |
2645 Val = legalizeUndef(Val); | 3103 Val = legalizeUndef(Val); |
2646 ValLoR = legalizeToReg(loOperand(Val)); | 3104 ValLoR = legalizeToReg(loOperand(Val)); |
2647 ValHiR = legalizeToReg(hiOperand(Val)); | 3105 ValHiR = legalizeToReg(hiOperand(Val)); |
2648 Variable *TLo = makeReg(IceType_i32); | 3106 Variable *TLo = makeReg(IceType_i32); |
2649 Variable *THi = makeReg(IceType_i32); | 3107 Variable *THi = makeReg(IceType_i32); |
2650 _rbit(TLo, ValLoR); | 3108 _rbit(TLo, ValLoR); |
2651 _rbit(THi, ValHiR); | 3109 _rbit(THi, ValHiR); |
2652 ValLoR = THi; | 3110 ValLoR = THi; |
2653 ValHiR = TLo; | 3111 ValHiR = TLo; |
2654 } else { | 3112 } else { |
2655 ValLoR = legalizeToReg(Val); | 3113 ValLoR = legalizeToReg(Val); |
2656 Variable *T = makeReg(IceType_i32); | 3114 Variable *T = makeReg(IceType_i32); |
2657 _rbit(T, ValLoR); | 3115 _rbit(T, ValLoR); |
2658 ValLoR = T; | 3116 ValLoR = T; |
2659 } | 3117 } |
2660 lowerCLZ(Instr->getDest(), ValLoR, ValHiR); | 3118 lowerCLZ(Dest, ValLoR, ValHiR); |
2661 return; | 3119 return; |
2662 } | 3120 } |
2663 case Intrinsics::Fabs: { | 3121 case Intrinsics::Fabs: { |
2664 Variable *Dest = Instr->getDest(); | |
2665 Type DestTy = Dest->getType(); | 3122 Type DestTy = Dest->getType(); |
2666 Variable *T = makeReg(DestTy); | 3123 Variable *T = makeReg(DestTy); |
2667 if (isVectorType(DestTy)) { | 3124 if (isVectorType(DestTy)) { |
2668 // Add a fake def to keep liveness consistent in the meantime. | 3125 // Add a fake def to keep liveness consistent in the meantime. |
2669 Context.insert(InstFakeDef::create(Func, T)); | 3126 Context.insert(InstFakeDef::create(Func, T)); |
2670 _mov(Instr->getDest(), T); | 3127 _mov(Dest, T); |
2671 UnimplementedError(Func->getContext()->getFlags()); | 3128 UnimplementedError(Func->getContext()->getFlags()); |
2672 return; | 3129 return; |
2673 } | 3130 } |
2674 _vabs(T, legalizeToReg(Instr->getArg(0))); | 3131 _vabs(T, legalizeToReg(Instr->getArg(0))); |
2675 _mov(Dest, T); | 3132 _mov(Dest, T); |
2676 return; | 3133 return; |
2677 } | 3134 } |
2678 case Intrinsics::Longjmp: { | 3135 case Intrinsics::Longjmp: { |
2679 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2); | 3136 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2); |
2680 Call->addArg(Instr->getArg(0)); | 3137 Call->addArg(Instr->getArg(0)); |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2714 Call->addArg(Instr->getArg(0)); | 3171 Call->addArg(Instr->getArg(0)); |
2715 Call->addArg(ValExt); | 3172 Call->addArg(ValExt); |
2716 Call->addArg(Instr->getArg(2)); | 3173 Call->addArg(Instr->getArg(2)); |
2717 lowerCall(Call); | 3174 lowerCall(Call); |
2718 return; | 3175 return; |
2719 } | 3176 } |
2720 case Intrinsics::NaClReadTP: { | 3177 case Intrinsics::NaClReadTP: { |
2721 if (Ctx->getFlags().getUseSandboxing()) { | 3178 if (Ctx->getFlags().getUseSandboxing()) { |
2722 UnimplementedError(Func->getContext()->getFlags()); | 3179 UnimplementedError(Func->getContext()->getFlags()); |
2723 } else { | 3180 } else { |
2724 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0); | 3181 InstCall *Call = makeHelperCall(H_call_read_tp, Dest, 0); |
2725 lowerCall(Call); | 3182 lowerCall(Call); |
2726 } | 3183 } |
2727 return; | 3184 return; |
2728 } | 3185 } |
2729 case Intrinsics::Setjmp: { | 3186 case Intrinsics::Setjmp: { |
2730 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1); | 3187 InstCall *Call = makeHelperCall(H_call_setjmp, Dest, 1); |
2731 Call->addArg(Instr->getArg(0)); | 3188 Call->addArg(Instr->getArg(0)); |
2732 lowerCall(Call); | 3189 lowerCall(Call); |
2733 return; | 3190 return; |
2734 } | 3191 } |
2735 case Intrinsics::Sqrt: { | 3192 case Intrinsics::Sqrt: { |
2736 Variable *Src = legalizeToReg(Instr->getArg(0)); | 3193 Variable *Src = legalizeToReg(Instr->getArg(0)); |
2737 Variable *Dest = Instr->getDest(); | |
2738 Variable *T = makeReg(Dest->getType()); | 3194 Variable *T = makeReg(Dest->getType()); |
2739 _vsqrt(T, Src); | 3195 _vsqrt(T, Src); |
2740 _mov(Dest, T); | 3196 _mov(Dest, T); |
2741 return; | 3197 return; |
2742 } | 3198 } |
2743 case Intrinsics::Stacksave: { | 3199 case Intrinsics::Stacksave: { |
2744 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 3200 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
2745 Variable *Dest = Instr->getDest(); | |
2746 _mov(Dest, SP); | 3201 _mov(Dest, SP); |
2747 return; | 3202 return; |
2748 } | 3203 } |
2749 case Intrinsics::Stackrestore: { | 3204 case Intrinsics::Stackrestore: { |
2750 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 3205 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
2751 Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex); | 3206 Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex); |
2752 _mov_redefined(SP, Val); | 3207 _mov_redefined(SP, Val); |
2753 return; | 3208 return; |
2754 } | 3209 } |
2755 case Intrinsics::Trap: | 3210 case Intrinsics::Trap: |
(...skipping 461 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3217 } | 3672 } |
3218 // If we didn't do address mode optimization, then we only have a base/offset | 3673 // If we didn't do address mode optimization, then we only have a base/offset |
3219 // to work with. ARM always requires a base register, so just use that to | 3674 // to work with. ARM always requires a base register, so just use that to |
3220 // hold the operand. | 3675 // hold the operand. |
3221 Variable *Base = legalizeToReg(Operand); | 3676 Variable *Base = legalizeToReg(Operand); |
3222 return OperandARM32Mem::create( | 3677 return OperandARM32Mem::create( |
3223 Func, Ty, Base, | 3678 Func, Ty, Base, |
3224 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); | 3679 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); |
3225 } | 3680 } |
3226 | 3681 |
| 3682 Variable64On32 *TargetARM32::makeI64RegPair() { |
| 3683 Variable64On32 *Reg = |
| 3684 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
| 3685 Reg->setMustHaveReg(); |
| 3686 Reg->initHiLo(Func); |
| 3687 Reg->getLo()->setMustNotHaveReg(); |
| 3688 Reg->getHi()->setMustNotHaveReg(); |
| 3689 return Reg; |
| 3690 } |
| 3691 |
3227 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) { | 3692 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) { |
3228 // There aren't any 64-bit integer registers for ARM32. | 3693 // There aren't any 64-bit integer registers for ARM32. |
3229 assert(Type != IceType_i64); | 3694 assert(Type != IceType_i64); |
3230 Variable *Reg = Func->makeVariable(Type); | 3695 Variable *Reg = Func->makeVariable(Type); |
3231 if (RegNum == Variable::NoRegister) | 3696 if (RegNum == Variable::NoRegister) |
3232 Reg->setMustHaveReg(); | 3697 Reg->setMustHaveReg(); |
3233 else | 3698 else |
3234 Reg->setRegNum(RegNum); | 3699 Reg->setRegNum(RegNum); |
3235 return Reg; | 3700 return Reg; |
3236 } | 3701 } |
(...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3479 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; | 3944 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; |
3480 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { | 3945 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { |
3481 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; | 3946 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; |
3482 } | 3947 } |
3483 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 3948 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
3484 // However, for compatibility with current NaCl LLVM, don't claim that. | 3949 // However, for compatibility with current NaCl LLVM, don't claim that. |
3485 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 3950 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
3486 } | 3951 } |
3487 | 3952 |
3488 } // end of namespace Ice | 3953 } // end of namespace Ice |
OLD | NEW |