OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
188 I64PairRegisters[RegARM32::val] = isI64Pair; \ | 188 I64PairRegisters[RegARM32::val] = isI64Pair; \ |
189 Float32Registers[RegARM32::val] = isFP32; \ | 189 Float32Registers[RegARM32::val] = isFP32; \ |
190 Float64Registers[RegARM32::val] = isFP64; \ | 190 Float64Registers[RegARM32::val] = isFP64; \ |
191 VectorRegisters[RegARM32::val] = isVec128; \ | 191 VectorRegisters[RegARM32::val] = isVec128; \ |
192 RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \ | 192 RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \ |
193 for (SizeT RegAlias : alias_init) { \ | 193 for (SizeT RegAlias : alias_init) { \ |
194 assert(!RegisterAliases[RegARM32::val][RegAlias] && \ | 194 assert(!RegisterAliases[RegARM32::val][RegAlias] && \ |
195 "Duplicate alias for " #val); \ | 195 "Duplicate alias for " #val); \ |
196 RegisterAliases[RegARM32::val].set(RegAlias); \ | 196 RegisterAliases[RegARM32::val].set(RegAlias); \ |
197 } \ | 197 } \ |
198 RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \ | |
199 assert(RegisterAliases[RegARM32::val][RegARM32::val]); \ | 198 assert(RegisterAliases[RegARM32::val][RegARM32::val]); \ |
200 ScratchRegs[RegARM32::val] = scratch; | 199 ScratchRegs[RegARM32::val] = scratch; |
201 REGARM32_TABLE; | 200 REGARM32_TABLE; |
202 #undef X | 201 #undef X |
203 TypeToRegisterSet[IceType_void] = InvalidRegisters; | 202 TypeToRegisterSet[IceType_void] = InvalidRegisters; |
204 TypeToRegisterSet[IceType_i1] = IntegerRegisters; | 203 TypeToRegisterSet[IceType_i1] = IntegerRegisters; |
205 TypeToRegisterSet[IceType_i8] = IntegerRegisters; | 204 TypeToRegisterSet[IceType_i8] = IntegerRegisters; |
206 TypeToRegisterSet[IceType_i16] = IntegerRegisters; | 205 TypeToRegisterSet[IceType_i16] = IntegerRegisters; |
207 TypeToRegisterSet[IceType_i32] = IntegerRegisters; | 206 TypeToRegisterSet[IceType_i32] = IntegerRegisters; |
208 TypeToRegisterSet[IceType_i64] = I64PairRegisters; | 207 TypeToRegisterSet[IceType_i64] = I64PairRegisters; |
209 TypeToRegisterSet[IceType_f32] = Float32Registers; | 208 TypeToRegisterSet[IceType_f32] = Float32Registers; |
210 TypeToRegisterSet[IceType_f64] = Float64Registers; | 209 TypeToRegisterSet[IceType_f64] = Float64Registers; |
211 TypeToRegisterSet[IceType_v4i1] = VectorRegisters; | 210 TypeToRegisterSet[IceType_v4i1] = VectorRegisters; |
212 TypeToRegisterSet[IceType_v8i1] = VectorRegisters; | 211 TypeToRegisterSet[IceType_v8i1] = VectorRegisters; |
213 TypeToRegisterSet[IceType_v16i1] = VectorRegisters; | 212 TypeToRegisterSet[IceType_v16i1] = VectorRegisters; |
214 TypeToRegisterSet[IceType_v16i8] = VectorRegisters; | 213 TypeToRegisterSet[IceType_v16i8] = VectorRegisters; |
215 TypeToRegisterSet[IceType_v8i16] = VectorRegisters; | 214 TypeToRegisterSet[IceType_v8i16] = VectorRegisters; |
216 TypeToRegisterSet[IceType_v4i32] = VectorRegisters; | 215 TypeToRegisterSet[IceType_v4i32] = VectorRegisters; |
217 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; | 216 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; |
218 } | 217 } |
219 | 218 |
219 namespace { | |
220 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) { | |
221 for (Variable *Var : Vars) { | |
222 auto *Var64 = llvm::dyn_cast<Variable64On32>(Var); | |
223 if (!Var64) { | |
224 // This is not the variable we are looking for. | |
225 continue; | |
226 } | |
227 assert(Var64->hasReg() || !Var64->mustHaveReg()); | |
228 if (!Var64->hasReg()) { | |
229 continue; | |
230 } | |
231 SizeT FirstReg = RegARM32::getI64PairFirstGPRNum(Var->getRegNum()); | |
232 // This assumes little endian. | |
233 Variable *Lo = Var64->getLo(); | |
234 Variable *Hi = Var64->getHi(); | |
235 assert(Lo->hasReg() == Hi->hasReg()); | |
236 if (Lo->hasReg()) { | |
237 continue; | |
238 } | |
239 Lo->setRegNum(FirstReg); | |
240 Lo->setMustHaveReg(); | |
241 Hi->setRegNum(FirstReg + 1); | |
242 Hi->setMustHaveReg(); | |
243 } | |
244 } | |
245 } // end of anonymous namespace | |
246 | |
220 void TargetARM32::translateO2() { | 247 void TargetARM32::translateO2() { |
221 TimerMarker T(TimerStack::TT_O2, Func); | 248 TimerMarker T(TimerStack::TT_O2, Func); |
222 | 249 |
223 // TODO(stichnot): share passes with X86? | 250 // TODO(stichnot): share passes with X86? |
224 // https://code.google.com/p/nativeclient/issues/detail?id=4094 | 251 // https://code.google.com/p/nativeclient/issues/detail?id=4094 |
225 | 252 |
226 if (!Ctx->getFlags().getPhiEdgeSplit()) { | 253 if (!Ctx->getFlags().getPhiEdgeSplit()) { |
227 // Lower Phi instructions. | 254 // Lower Phi instructions. |
228 Func->placePhiLoads(); | 255 Func->placePhiLoads(); |
229 if (Func->hasError()) | 256 if (Func->hasError()) |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
277 // Validate the live range computations. The expensive validation call is | 304 // Validate the live range computations. The expensive validation call is |
278 // deliberately only made when assertions are enabled. | 305 // deliberately only made when assertions are enabled. |
279 assert(Func->validateLiveness()); | 306 assert(Func->validateLiveness()); |
280 // The post-codegen dump is done here, after liveness analysis and associated | 307 // The post-codegen dump is done here, after liveness analysis and associated |
281 // cleanup, to make the dump cleaner and more useful. | 308 // cleanup, to make the dump cleaner and more useful. |
282 Func->dump("After initial ARM32 codegen"); | 309 Func->dump("After initial ARM32 codegen"); |
283 Func->getVMetadata()->init(VMK_All); | 310 Func->getVMetadata()->init(VMK_All); |
284 regAlloc(RAK_Global); | 311 regAlloc(RAK_Global); |
285 if (Func->hasError()) | 312 if (Func->hasError()) |
286 return; | 313 return; |
314 copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); | |
287 Func->dump("After linear scan regalloc"); | 315 Func->dump("After linear scan regalloc"); |
288 | 316 |
289 if (Ctx->getFlags().getPhiEdgeSplit()) { | 317 if (Ctx->getFlags().getPhiEdgeSplit()) { |
290 Func->advancedPhiLowering(); | 318 Func->advancedPhiLowering(); |
291 Func->dump("After advanced Phi lowering"); | 319 Func->dump("After advanced Phi lowering"); |
292 } | 320 } |
293 | 321 |
294 // Stack frame mapping. | 322 // Stack frame mapping. |
295 Func->genFrame(); | 323 Func->genFrame(); |
296 if (Func->hasError()) | 324 if (Func->hasError()) |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
337 Func->doArgLowering(); | 365 Func->doArgLowering(); |
338 | 366 |
339 Func->genCode(); | 367 Func->genCode(); |
340 if (Func->hasError()) | 368 if (Func->hasError()) |
341 return; | 369 return; |
342 Func->dump("After initial ARM32 codegen"); | 370 Func->dump("After initial ARM32 codegen"); |
343 | 371 |
344 regAlloc(RAK_InfOnly); | 372 regAlloc(RAK_InfOnly); |
345 if (Func->hasError()) | 373 if (Func->hasError()) |
346 return; | 374 return; |
375 copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); | |
347 Func->dump("After regalloc of infinite-weight variables"); | 376 Func->dump("After regalloc of infinite-weight variables"); |
348 | 377 |
349 Func->genFrame(); | 378 Func->genFrame(); |
350 if (Func->hasError()) | 379 if (Func->hasError()) |
351 return; | 380 return; |
352 Func->dump("After stack frame mapping"); | 381 Func->dump("After stack frame mapping"); |
353 | 382 |
354 legalizeStackSlots(); | 383 legalizeStackSlots(); |
355 if (Func->hasError()) | 384 if (Func->hasError()) |
356 return; | 385 return; |
(...skipping 252 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
609 // value from the stack slot. | 638 // value from the stack slot. |
610 if (Arg->hasReg()) { | 639 if (Arg->hasReg()) { |
611 assert(Ty != IceType_i64); | 640 assert(Ty != IceType_i64); |
612 // This should be simple, just load the parameter off the stack using a nice | 641 // This should be simple, just load the parameter off the stack using a nice |
613 // sp + imm addressing mode. Because ARM, we can't do that (e.g., VLDR, for | 642 // sp + imm addressing mode. Because ARM, we can't do that (e.g., VLDR, for |
614 // fp types, cannot have an index register), so we legalize the memory | 643 // fp types, cannot have an index register), so we legalize the memory |
615 // operand instead. | 644 // operand instead. |
616 auto *Mem = OperandARM32Mem::create( | 645 auto *Mem = OperandARM32Mem::create( |
617 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( | 646 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( |
618 Ctx->getConstantInt32(Arg->getStackOffset()))); | 647 Ctx->getConstantInt32(Arg->getStackOffset()))); |
619 legalizeToReg(Mem, Arg->getRegNum()); | 648 _mov(Arg, legalizeToReg(Mem, Arg->getRegNum())); |
620 // This argument-copying instruction uses an explicit OperandARM32Mem | 649 // This argument-copying instruction uses an explicit OperandARM32Mem |
621 // operand instead of a Variable, so its fill-from-stack operation has to | 650 // operand instead of a Variable, so its fill-from-stack operation has to |
622 // be tracked separately for statistics. | 651 // be tracked separately for statistics. |
623 Ctx->statsUpdateFills(); | 652 Ctx->statsUpdateFills(); |
624 } | 653 } |
625 } | 654 } |
626 | 655 |
627 Type TargetARM32::stackSlotType() { return IceType_i32; } | 656 Type TargetARM32::stackSlotType() { return IceType_i32; } |
628 | 657 |
629 void TargetARM32::addProlog(CfgNode *Node) { | 658 void TargetARM32::addProlog(CfgNode *Node) { |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
709 if (UsesFramePointer) { | 738 if (UsesFramePointer) { |
710 CalleeSaves[RegARM32::Reg_fp] = true; | 739 CalleeSaves[RegARM32::Reg_fp] = true; |
711 assert(RegsUsed[RegARM32::Reg_fp] == false); | 740 assert(RegsUsed[RegARM32::Reg_fp] == false); |
712 RegsUsed[RegARM32::Reg_fp] = true; | 741 RegsUsed[RegARM32::Reg_fp] = true; |
713 } | 742 } |
714 if (!MaybeLeafFunc) { | 743 if (!MaybeLeafFunc) { |
715 CalleeSaves[RegARM32::Reg_lr] = true; | 744 CalleeSaves[RegARM32::Reg_lr] = true; |
716 RegsUsed[RegARM32::Reg_lr] = true; | 745 RegsUsed[RegARM32::Reg_lr] = true; |
717 } | 746 } |
718 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | 747 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
748 if (RegARM32::isI64RegisterPair(i)) { | |
749 // We don't save register pairs explicitly. Instead, we rely on the code | |
750 // fake-defing/fake-using each register in the pair. | |
751 continue; | |
752 } | |
719 if (CalleeSaves[i] && RegsUsed[i]) { | 753 if (CalleeSaves[i] && RegsUsed[i]) { |
720 // TODO(jvoung): do separate vpush for each floating point register | 754 // TODO(jvoung): do separate vpush for each floating point register |
721 // segment and += 4, or 8 depending on type. | 755 // segment and += 4, or 8 depending on type. |
722 ++NumCallee; | 756 ++NumCallee; |
723 PreservedRegsSizeBytes += 4; | 757 PreservedRegsSizeBytes += 4; |
724 GPRsToPreserve.push_back(getPhysicalRegister(i)); | 758 GPRsToPreserve.push_back(getPhysicalRegister(i)); |
725 } | 759 } |
726 } | 760 } |
727 Ctx->statsUpdateRegistersSaved(NumCallee); | 761 Ctx->statsUpdateRegistersSaved(NumCallee); |
728 if (!GPRsToPreserve.empty()) | 762 if (!GPRsToPreserve.empty()) |
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
877 // Consider FP and LR as callee-save / used as needed. | 911 // Consider FP and LR as callee-save / used as needed. |
878 if (UsesFramePointer) { | 912 if (UsesFramePointer) { |
879 CalleeSaves[RegARM32::Reg_fp] = true; | 913 CalleeSaves[RegARM32::Reg_fp] = true; |
880 } | 914 } |
881 if (!MaybeLeafFunc) { | 915 if (!MaybeLeafFunc) { |
882 CalleeSaves[RegARM32::Reg_lr] = true; | 916 CalleeSaves[RegARM32::Reg_lr] = true; |
883 } | 917 } |
884 // Pop registers in ascending order just like push (instead of in reverse | 918 // Pop registers in ascending order just like push (instead of in reverse |
885 // order). | 919 // order). |
886 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | 920 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
921 if (RegARM32::isI64RegisterPair(i)) { | |
922 continue; | |
923 } | |
924 | |
887 if (CalleeSaves[i] && RegsUsed[i]) { | 925 if (CalleeSaves[i] && RegsUsed[i]) { |
888 GPRsToRestore.push_back(getPhysicalRegister(i)); | 926 GPRsToRestore.push_back(getPhysicalRegister(i)); |
889 } | 927 } |
890 } | 928 } |
891 if (!GPRsToRestore.empty()) | 929 if (!GPRsToRestore.empty()) |
892 _pop(GPRsToRestore); | 930 _pop(GPRsToRestore); |
893 | 931 |
894 if (!Ctx->getFlags().getUseSandboxing()) | 932 if (!Ctx->getFlags().getUseSandboxing()) |
895 return; | 933 return; |
896 | 934 |
(...skipping 835 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1732 Operand *Src0 = Inst->getSrc(0); | 1770 Operand *Src0 = Inst->getSrc(0); |
1733 assert(Dest->getType() == Src0->getType()); | 1771 assert(Dest->getType() == Src0->getType()); |
1734 if (Dest->getType() == IceType_i64) { | 1772 if (Dest->getType() == IceType_i64) { |
1735 Src0 = legalizeUndef(Src0); | 1773 Src0 = legalizeUndef(Src0); |
1736 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | 1774 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
1737 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | 1775 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); |
1738 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1776 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
1739 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1777 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
1740 Variable *T_Lo = makeReg(IceType_i32); | 1778 Variable *T_Lo = makeReg(IceType_i32); |
1741 Variable *T_Hi = makeReg(IceType_i32); | 1779 Variable *T_Hi = makeReg(IceType_i32); |
1780 | |
1742 _mov(T_Lo, Src0Lo); | 1781 _mov(T_Lo, Src0Lo); |
1743 _mov(DestLo, T_Lo); | 1782 _mov(DestLo, T_Lo); |
1744 _mov(T_Hi, Src0Hi); | 1783 _mov(T_Hi, Src0Hi); |
1745 _mov(DestHi, T_Hi); | 1784 _mov(DestHi, T_Hi); |
1746 } else { | 1785 } else { |
1747 Operand *NewSrc; | 1786 Operand *NewSrc; |
1748 if (Dest->hasReg()) { | 1787 if (Dest->hasReg()) { |
1749 // If Dest already has a physical register, then legalize the Src operand | 1788 // If Dest already has a physical register, then legalize the Src operand |
1750 // into a Variable with the same register assignment. This especially | 1789 // into a Variable with the same register assignment. This especially |
1751 // helps allow the use of Flex operands. | 1790 // helps allow the use of Flex operands. |
(...skipping 512 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2264 case IceType_i64: { | 2303 case IceType_i64: { |
2265 // t0, t1 <- src0 | 2304 // t0, t1 <- src0 |
2266 // dest[31..0] = t0 | 2305 // dest[31..0] = t0 |
2267 // dest[63..32] = t1 | 2306 // dest[63..32] = t1 |
2268 assert(Src0->getType() == IceType_f64); | 2307 assert(Src0->getType() == IceType_f64); |
2269 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | 2308 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
2270 T->initHiLo(Func); | 2309 T->initHiLo(Func); |
2271 configureBitcastTemporary(T); | 2310 configureBitcastTemporary(T); |
2272 Variable *Src0R = legalizeToReg(Src0); | 2311 Variable *Src0R = legalizeToReg(Src0); |
2273 _mov(T, Src0R); | 2312 _mov(T, Src0R); |
2274 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); | 2313 lowerAssign(InstAssign::create(Func, Dest, T)); |
2275 lowerAssign(InstAssign::create(Func, Dest64On32->getLo(), T->getLo())); | |
2276 lowerAssign(InstAssign::create(Func, Dest64On32->getHi(), T->getHi())); | |
2277 break; | 2314 break; |
2278 } | 2315 } |
2279 case IceType_f64: { | 2316 case IceType_f64: { |
2280 // T0 <- lo(src) | 2317 // T0 <- lo(src) |
2281 // T1 <- hi(src) | 2318 // T1 <- hi(src) |
2282 // vmov T2, T0, T1 | 2319 // vmov T2, T0, T1 |
2283 // Dest <- T2 | 2320 // Dest <- T2 |
2284 assert(Src0->getType() == IceType_i64); | 2321 assert(Src0->getType() == IceType_i64); |
2322 Variable *T = makeReg(DestType); | |
2285 auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | 2323 auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
2286 Src64->initHiLo(Func); | 2324 Src64->initHiLo(Func); |
2287 configureBitcastTemporary(Src64); | 2325 configureBitcastTemporary(Src64); |
2288 lowerAssign(InstAssign::create(Func, Src64, Src0)); | 2326 lowerAssign(InstAssign::create(Func, Src64, Src0)); |
2289 Variable *T = makeReg(IceType_f64); | |
2290 _mov(T, Src64); | 2327 _mov(T, Src64); |
2291 lowerAssign(InstAssign::create(Func, Dest, T)); | 2328 lowerAssign(InstAssign::create(Func, Dest, T)); |
2292 break; | 2329 break; |
2293 } | 2330 } |
2294 case IceType_v4i1: | 2331 case IceType_v4i1: |
2295 case IceType_v8i1: | 2332 case IceType_v8i1: |
2296 case IceType_v16i1: | 2333 case IceType_v16i1: |
2297 case IceType_v8i16: | 2334 case IceType_v8i16: |
2298 case IceType_v16i8: | 2335 case IceType_v16i8: |
2299 case IceType_v4f32: | 2336 case IceType_v4f32: |
(...skipping 230 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2530 _mov_redefined(T, One, getIcmp32Mapping(Inst->getCondition())); | 2567 _mov_redefined(T, One, getIcmp32Mapping(Inst->getCondition())); |
2531 _mov(Dest, T); | 2568 _mov(Dest, T); |
2532 return; | 2569 return; |
2533 } | 2570 } |
2534 | 2571 |
2535 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) { | 2572 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) { |
2536 (void)Inst; | 2573 (void)Inst; |
2537 UnimplementedError(Func->getContext()->getFlags()); | 2574 UnimplementedError(Func->getContext()->getFlags()); |
2538 } | 2575 } |
2539 | 2576 |
2577 namespace { | |
2578 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { | |
2579 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | |
2580 return Integer->getValue(); | |
2581 return Intrinsics::MemoryOrderInvalid; | |
2582 } | |
2583 } // end of anonymous namespace | |
2584 | |
2585 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | |
2586 Operand *Ptr, Operand *Val) { | |
2587 // retry: | |
2588 // ldrex contents, [addr] | |
2589 // op tmp, contents, operand | |
2590 // strex success, tmp, [addr] | |
2591 // jne retry | |
2592 // fake-use(addr, operand) @ prevents undesireable clobbering. | |
Jim Stichnoth
2015/10/05 22:07:06
undesirable
John
2015/10/06 12:03:38
Done.
| |
2593 // mov dest, contents | |
2594 assert(Dest != nullptr); | |
2595 Type DestTy = Dest->getType(); | |
2596 (void)Ptr; | |
2597 (void)Val; | |
2598 | |
2599 OperandARM32Mem *Mem; | |
2600 Variable *PtrContentsReg; | |
2601 Variable *PtrContentsHiReg; | |
2602 Variable *PtrContentsLoReg; | |
2603 Variable *Value = Func->makeVariable(DestTy); | |
2604 Variable *ValueReg; | |
2605 Variable *ValueHiReg; | |
2606 Variable *ValueLoReg; | |
2607 Variable *Success = makeReg(IceType_i32); | |
2608 Variable *TmpReg; | |
2609 Variable *TmpHiReg; | |
2610 Variable *TmpLoReg; | |
2611 Operand *_0 = Ctx->getConstantZero(IceType_i32); | |
2612 InstARM32Label *Retry = InstARM32Label::create(Func, this); | |
2613 | |
2614 if (DestTy != IceType_i64) { | |
Jim Stichnoth
2015/10/05 22:07:06
(here and below)
I think instead of this:
if (A
John
2015/10/06 12:03:38
Done here, and elsewhere.
| |
2615 PtrContentsReg = makeReg(DestTy); | |
2616 PtrContentsHiReg = nullptr; | |
2617 PtrContentsLoReg = PtrContentsReg; | |
2618 | |
2619 ValueReg = makeReg(DestTy); | |
2620 ValueHiReg = nullptr; | |
2621 ValueLoReg = ValueReg; | |
2622 | |
2623 TmpReg = makeReg(DestTy); | |
2624 TmpHiReg = nullptr; | |
2625 TmpLoReg = TmpReg; | |
2626 } else { | |
2627 Variable64On32 *PtrContentsReg64 = makeI64RegPair(); | |
2628 PtrContentsHiReg = PtrContentsReg64->getHi(); | |
2629 PtrContentsLoReg = PtrContentsReg64->getLo(); | |
2630 PtrContentsReg = PtrContentsReg64; | |
2631 | |
2632 llvm::cast<Variable64On32>(Value)->initHiLo(Func); | |
2633 Variable64On32 *ValueReg64 = makeI64RegPair(); | |
2634 ValueHiReg = ValueReg64->getHi(); | |
2635 ValueLoReg = ValueReg64->getLo(); | |
2636 ValueReg = ValueReg64; | |
2637 | |
2638 Variable64On32 *TmpReg64 = makeI64RegPair(); | |
2639 TmpHiReg = TmpReg64->getHi(); | |
2640 TmpLoReg = TmpReg64->getLo(); | |
2641 TmpReg = TmpReg64; | |
2642 } | |
2643 | |
2644 if (DestTy == IceType_i64) { | |
2645 Context.insert(InstFakeDef::create(Func, Value)); | |
2646 } | |
2647 lowerAssign(InstAssign::create(Func, Value, Val)); | |
2648 | |
2649 Variable *PtrVar = Func->makeVariable(IceType_i32); | |
2650 lowerAssign(InstAssign::create(Func, PtrVar, Ptr)); | |
2651 | |
2652 _dmb(); | |
2653 Context.insert(Retry); | |
2654 Mem = formMemoryOperand(PtrVar, DestTy); | |
2655 if (DestTy == IceType_i64) { | |
2656 Context.insert(InstFakeDef::create(Func, ValueReg, Value)); | |
2657 } | |
2658 lowerAssign(InstAssign::create(Func, ValueReg, Value)); | |
2659 if (DestTy == IceType_i8 || DestTy == IceType_i16) { | |
2660 _uxt(ValueReg, ValueReg); | |
2661 } | |
2662 _ldrex(PtrContentsReg, Mem); | |
2663 | |
2664 if (DestTy == IceType_i64) { | |
2665 Context.insert(InstFakeDef::create(Func, TmpReg, ValueReg)); | |
2666 } | |
2667 switch (Operation) { | |
2668 default: | |
2669 Func->setError("Unknown AtomicRMW operation"); | |
2670 return; | |
2671 case Intrinsics::AtomicAdd: | |
2672 if (DestTy != IceType_i64) { | |
2673 _add(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
2674 } else { | |
2675 _adds(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
2676 _adc(TmpHiReg, PtrContentsHiReg, ValueHiReg); | |
2677 } | |
2678 break; | |
2679 case Intrinsics::AtomicSub: | |
2680 if (DestTy != IceType_i64) { | |
2681 _sub(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
2682 } else { | |
2683 _subs(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
2684 _sbc(TmpHiReg, PtrContentsHiReg, ValueHiReg); | |
2685 } | |
2686 break; | |
2687 case Intrinsics::AtomicOr: | |
2688 _orr(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
2689 if (DestTy == IceType_i64) { | |
2690 _orr(TmpHiReg, PtrContentsHiReg, ValueHiReg); | |
2691 } | |
2692 break; | |
2693 case Intrinsics::AtomicAnd: | |
2694 _and(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
2695 if (DestTy == IceType_i64) { | |
2696 _and(TmpHiReg, PtrContentsHiReg, ValueHiReg); | |
2697 } | |
2698 break; | |
2699 case Intrinsics::AtomicXor: | |
2700 _eor(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
2701 if (DestTy == IceType_i64) { | |
2702 _eor(TmpHiReg, PtrContentsHiReg, ValueHiReg); | |
2703 } | |
2704 break; | |
2705 case Intrinsics::AtomicExchange: | |
2706 _mov(TmpLoReg, ValueLoReg); | |
2707 if (DestTy == IceType_i64) { | |
2708 _mov(TmpHiReg, ValueHiReg); | |
2709 } | |
2710 break; | |
2711 } | |
2712 _strex(Success, TmpReg, Mem); | |
2713 _cmp(Success, _0); | |
2714 _br(Retry, CondARM32::NE); | |
2715 | |
2716 // The following fake-uses ensure that Subzero will not clobber them in the | |
2717 // load-linked/store-conditional loop above. We might have to spill them, but | |
2718 // spilling is preferable over incorrect behavior. | |
2719 Context.insert(InstFakeUse::create(Func, PtrVar)); | |
2720 if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) { | |
2721 Context.insert(InstFakeUse::create(Func, Value64->getHi())); | |
2722 Context.insert(InstFakeUse::create(Func, Value64->getLo())); | |
2723 } else { | |
2724 Context.insert(InstFakeUse::create(Func, Value)); | |
2725 } | |
2726 _dmb(); | |
2727 if (DestTy == IceType_i8 || DestTy == IceType_i16) { | |
2728 _uxt(PtrContentsReg, PtrContentsReg); | |
2729 } | |
2730 | |
2731 if (DestTy == IceType_i64) { | |
2732 Context.insert(InstFakeUse::create(Func, PtrContentsReg)); | |
2733 } | |
2734 lowerAssign(InstAssign::create(Func, Dest, PtrContentsReg)); | |
2735 if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) { | |
2736 Context.insert(InstFakeUse::create(Func, Dest64->getLo())); | |
2737 Context.insert(InstFakeUse::create(Func, Dest64->getHi())); | |
2738 } else { | |
2739 Context.insert(InstFakeUse::create(Func, Dest)); | |
2740 } | |
2741 } | |
2742 | |
2540 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 2743 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
2541 switch (Instr->getIntrinsicInfo().ID) { | 2744 Variable *Dest = Instr->getDest(); |
2745 Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void; | |
2746 Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID; | |
2747 switch (ID) { | |
2748 case Intrinsics::AtomicFence: | |
2749 case Intrinsics::AtomicFenceAll: | |
2750 assert(Dest == nullptr); | |
2751 _dmb(); | |
2752 return; | |
2753 case Intrinsics::AtomicIsLockFree: { | |
2754 Operand *ByteSize = Instr->getArg(0); | |
2755 auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize); | |
2756 if (CI == nullptr) { | |
2757 // The PNaCl ABI requires the byte size to be a compile-time constant. | |
2758 Func->setError("AtomicIsLockFree byte size should be compile-time const"); | |
2759 return; | |
2760 } | |
2761 static constexpr int32_t NotLockFree = 0; | |
2762 static constexpr int32_t LockFree = 1; | |
2763 int32_t Result = NotLockFree; | |
2764 switch (CI->getValue()) { | |
2765 case 1: | |
2766 case 2: | |
2767 case 4: | |
2768 case 8: | |
2769 Result = LockFree; | |
2770 break; | |
2771 } | |
2772 _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result))); | |
2773 return; | |
2774 } | |
2775 case Intrinsics::AtomicLoad: { | |
2776 assert(isScalarIntegerType(DestTy)); | |
2777 // We require the memory address to be naturally aligned. Given that is the | |
2778 // case, then normal loads are atomic. | |
2779 if (!Intrinsics::isMemoryOrderValid( | |
2780 ID, getConstantMemoryOrder(Instr->getArg(1)))) { | |
2781 Func->setError("Unexpected memory ordering for AtomicLoad"); | |
2782 return; | |
2783 } | |
2784 Variable *T; | |
2785 | |
2786 if (DestTy == IceType_i64) { | |
2787 // ldrex is the only arm instruction that is guaranteed to load a 64-bit | |
2788 // integer atomically. Everything else works with a regular ldr. | |
2789 T = makeI64RegPair(); | |
2790 _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64)); | |
2791 } else { | |
2792 T = makeReg(DestTy); | |
2793 _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy)); | |
2794 } | |
2795 _dmb(); | |
2796 lowerAssign(InstAssign::create(Func, Dest, T)); | |
2797 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. | |
2798 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert | |
2799 // the FakeUse on the last-inserted instruction's dest. | |
2800 Context.insert( | |
2801 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); | |
2802 return; | |
2803 } | |
2804 case Intrinsics::AtomicStore: { | |
2805 // We require the memory address to be naturally aligned. Given that is the | |
2806 // case, then normal loads are atomic. | |
2807 if (!Intrinsics::isMemoryOrderValid( | |
2808 ID, getConstantMemoryOrder(Instr->getArg(2)))) { | |
2809 Func->setError("Unexpected memory ordering for AtomicStore"); | |
2810 return; | |
2811 } | |
2812 Operand *Value = Instr->getArg(0); | |
2813 Type ValueTy = Value->getType(); | |
2814 assert(isScalarIntegerType(ValueTy)); | |
2815 Operand *Addr = Instr->getArg(1); | |
2816 | |
2817 _dmb(); | |
2818 if (ValueTy != IceType_i64) { | |
2819 // non-64-bit stores are atomically as long as the address is aligned. | |
2820 // This is PNaCl, so addresses are aligned. | |
2821 Variable *T = makeReg(ValueTy); | |
2822 lowerAssign(InstAssign::create(Func, T, Value)); | |
2823 _str(T, formMemoryOperand(Addr, ValueTy)); | |
2824 } else { | |
2825 // Atomic 64-bit stores require a load-locked/store-conditional loop using | |
2826 // ldrexd, and strexd. The lowered code is: | |
2827 // | |
2828 // retry: | |
2829 // ldrexd t.lo, t.hi, [addr] | |
2830 // strexd success, value.lo, value.hi, [addr] | |
2831 // cmp success, #0 | |
2832 // bne retry | |
2833 // fake-use(addr, value.lo, value.hi) | |
2834 // | |
2835 // The fake-use is needed to prevent those variables from being clobbered | |
2836 // in the loop (which will happen under register pressure.) | |
2837 Variable64On32 *Tmp = makeI64RegPair(); | |
2838 Variable64On32 *ValueVar = | |
2839 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | |
2840 Variable *AddrVar = makeReg(IceType_i32); | |
2841 Variable *Success = makeReg(IceType_i32); | |
2842 OperandARM32Mem *Mem; | |
2843 Operand *_0 = Ctx->getConstantZero(IceType_i32); | |
2844 InstARM32Label *Retry = InstARM32Label::create(Func, this); | |
2845 Variable64On32 *NewReg = makeI64RegPair(); | |
2846 ValueVar->initHiLo(Func); | |
2847 ValueVar->mustNotHaveReg(); | |
2848 | |
2849 lowerAssign(InstAssign::create(Func, ValueVar, Value)); | |
2850 lowerAssign(InstAssign::create(Func, AddrVar, Addr)); | |
2851 | |
2852 Context.insert(Retry); | |
2853 Context.insert(InstFakeDef::create(Func, NewReg)); | |
2854 lowerAssign(InstAssign::create(Func, NewReg, ValueVar)); | |
2855 Mem = formMemoryOperand(AddrVar, IceType_i64); | |
2856 _ldrex(Tmp, Mem); | |
2857 // This fake-use both prevents the ldrex from being dead-code eliminated, | |
2858 // while also keeping liveness happy about all defs being used. | |
2859 Context.insert( | |
2860 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); | |
2861 _strex(Success, NewReg, Mem); | |
2862 _cmp(Success, _0); | |
2863 _br(Retry, CondARM32::NE); | |
2864 | |
2865 Context.insert(InstFakeUse::create(Func, ValueVar->getLo())); | |
2866 Context.insert(InstFakeUse::create(Func, ValueVar->getHi())); | |
2867 Context.insert(InstFakeUse::create(Func, AddrVar)); | |
2868 } | |
2869 _dmb(); | |
2870 return; | |
2871 } | |
2542 case Intrinsics::AtomicCmpxchg: { | 2872 case Intrinsics::AtomicCmpxchg: { |
2543 UnimplementedError(Func->getContext()->getFlags()); | 2873 // The initial lowering for cmpxchg was: |
2544 return; | 2874 // |
2545 } | 2875 // retry: |
2546 case Intrinsics::AtomicFence: | 2876 // ldrex tmp, [addr] |
2547 UnimplementedError(Func->getContext()->getFlags()); | 2877 // cmp tmp, expected |
2548 return; | 2878 // mov expected, tmp |
2549 case Intrinsics::AtomicFenceAll: | 2879 // jne retry |
2550 // NOTE: FenceAll should prevent and load/store from being moved across the | 2880 // strex success, new, [addr] |
2551 // fence (both atomic and non-atomic). The InstARM32Mfence instruction is | 2881 // cmp success, #0 |
2552 // currently marked coarsely as "HasSideEffects". | 2882 // bne retry |
2553 UnimplementedError(Func->getContext()->getFlags()); | 2883 // mov dest, expected |
2554 return; | 2884 // |
2555 case Intrinsics::AtomicIsLockFree: { | 2885 // Besides requiring two branches, that lowering could also potentially |
2556 UnimplementedError(Func->getContext()->getFlags()); | 2886 // write to memory (in mov expected, tmp) unless we were OK with increasing |
2557 return; | 2887 // the register pressure and requiring expected to be an infinite-weight |
2558 } | 2888 // variable (spoiler alert: that was a problem for i64 cmpxchg.) Through |
2559 case Intrinsics::AtomicLoad: { | 2889 // careful rewritting, and thanks to predication, we now implement the |
2560 UnimplementedError(Func->getContext()->getFlags()); | 2890 // lowering as: |
2561 return; | 2891 // |
2562 } | 2892 // retry: |
2563 case Intrinsics::AtomicRMW: | 2893 // ldrex tmp, [addr] |
2564 UnimplementedError(Func->getContext()->getFlags()); | 2894 // cmp tmp, expected |
2565 return; | 2895 // strexeq success, new, [addr] |
2566 case Intrinsics::AtomicStore: { | 2896 // movne expected, tmp |
2567 UnimplementedError(Func->getContext()->getFlags()); | 2897 // cmpeq success, #0 |
2898 // bne retry | |
2899 // mov dest, expected | |
2900 // | |
2901 // Predication lets us move the strex ahead of the mov expected, tmp, which | |
2902 // allows tmp to be a non-infinite weight temporary. We wanted to avoid | |
2903 // writing to memory between ldrex and strex because, even though most times | |
2904 // that would cause no issues, if any interleaving memory write aliased | |
2905 // [addr] than we would have undefined behavior. Undefined behavior isn't | |
2906 // cool, so we try to avoid it. See the "Synchronization and semaphores" | |
2907 // section of the "ARM Architecture Reference Manual." | |
2908 | |
2909 assert(isScalarIntegerType(DestTy)); | |
2910 // We require the memory address to be naturally aligned. Given that is the | |
2911 // case, then normal loads are atomic. | |
2912 if (!Intrinsics::isMemoryOrderValid( | |
2913 ID, getConstantMemoryOrder(Instr->getArg(3)), | |
2914 getConstantMemoryOrder(Instr->getArg(4)))) { | |
2915 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); | |
2916 return; | |
2917 } | |
2918 | |
2919 OperandARM32Mem *Mem; | |
2920 Variable *TmpReg; | |
2921 Variable *Expected, *ExpectedReg; | |
2922 Variable *New, *NewReg; | |
2923 Variable *Success = makeReg(IceType_i32); | |
2924 Operand *_0 = Ctx->getConstantZero(IceType_i32); | |
2925 InstARM32Label *Retry = InstARM32Label::create(Func, this); | |
2926 | |
2927 if (DestTy == IceType_i64) { | |
2928 Variable64On32 *TmpReg64 = makeI64RegPair(); | |
2929 Variable64On32 *New64 = | |
2930 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | |
2931 Variable64On32 *NewReg64 = makeI64RegPair(); | |
2932 Variable64On32 *Expected64 = | |
2933 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | |
2934 Variable64On32 *ExpectedReg64 = makeI64RegPair(); | |
2935 | |
2936 New64->initHiLo(Func); | |
2937 New64->mustNotHaveReg(); | |
2938 Expected64->initHiLo(Func); | |
2939 Expected64->mustNotHaveReg(); | |
2940 | |
2941 TmpReg = TmpReg64; | |
2942 New = New64; | |
2943 NewReg = NewReg64; | |
2944 Expected = Expected64; | |
2945 ExpectedReg = ExpectedReg64; | |
2946 } else { | |
2947 TmpReg = makeReg(DestTy); | |
2948 New = Func->makeVariable(DestTy); | |
2949 NewReg = makeReg(DestTy); | |
2950 Expected = Func->makeVariable(DestTy); | |
2951 ExpectedReg = makeReg(DestTy); | |
2952 } | |
2953 | |
2954 Mem = formMemoryOperand(Instr->getArg(0), DestTy); | |
2955 if (DestTy == IceType_i64) { | |
2956 Context.insert(InstFakeDef::create(Func, Expected)); | |
2957 } | |
2958 lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1))); | |
2959 if (DestTy == IceType_i64) { | |
2960 Context.insert(InstFakeDef::create(Func, New)); | |
2961 } | |
2962 lowerAssign(InstAssign::create(Func, New, Instr->getArg(2))); | |
2963 _dmb(); | |
2964 | |
2965 Context.insert(Retry); | |
2966 if (DestTy == IceType_i64) { | |
2967 Context.insert(InstFakeDef::create(Func, ExpectedReg, Expected)); | |
2968 } | |
2969 lowerAssign(InstAssign::create(Func, ExpectedReg, Expected)); | |
2970 if (DestTy == IceType_i64) { | |
2971 Context.insert(InstFakeDef::create(Func, NewReg, New)); | |
2972 } | |
2973 lowerAssign(InstAssign::create(Func, NewReg, New)); | |
2974 | |
2975 _ldrex(TmpReg, Mem); | |
2976 Context.insert( | |
2977 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); | |
2978 if (DestTy != IceType_i64) { | |
2979 _cmp(TmpReg, ExpectedReg); | |
2980 } else { | |
2981 auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg); | |
2982 auto *ExpectedReg64 = llvm::cast<Variable64On32>(ExpectedReg); | |
2983 // lowerAssign above has added fake-defs for TmpReg and ExpectedReg. Let's | |
2984 // keep liveness happy, shall we? | |
2985 Context.insert(InstFakeUse::create(Func, TmpReg)); | |
2986 Context.insert(InstFakeUse::create(Func, ExpectedReg)); | |
2987 _cmp(TmpReg64->getHi(), ExpectedReg64->getHi()); | |
2988 _cmp(TmpReg64->getLo(), ExpectedReg64->getLo(), CondARM32::EQ); | |
2989 } | |
2990 _strex(Success, NewReg, Mem, CondARM32::EQ); | |
2991 if (DestTy != IceType_i64) { | |
2992 _mov_redefined(Expected, TmpReg, CondARM32::NE); | |
2993 } else { | |
2994 auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg); | |
2995 auto *Expected64 = llvm::cast<Variable64On32>(Expected); | |
2996 _mov_redefined(Expected64->getHi(), TmpReg64->getHi(), CondARM32::NE); | |
2997 _mov_redefined(Expected64->getLo(), TmpReg64->getLo(), CondARM32::NE); | |
2998 auto *FakeDef = InstFakeDef::create(Func, Expected, TmpReg); | |
2999 Context.insert(FakeDef); | |
3000 FakeDef->setDestRedefined(); | |
3001 } | |
3002 _cmp(Success, _0, CondARM32::EQ); | |
3003 _br(Retry, CondARM32::NE); | |
3004 _dmb(); | |
3005 lowerAssign(InstAssign::create(Func, Dest, Expected)); | |
3006 Context.insert(InstFakeUse::create(Func, Expected)); | |
3007 if (auto *New64 = llvm::dyn_cast<Variable64On32>(New)) { | |
3008 Context.insert(InstFakeUse::create(Func, New64->getLo())); | |
3009 Context.insert(InstFakeUse::create(Func, New64->getHi())); | |
3010 } else { | |
3011 Context.insert(InstFakeUse::create(Func, New)); | |
3012 } | |
3013 return; | |
3014 } | |
3015 case Intrinsics::AtomicRMW: { | |
3016 if (!Intrinsics::isMemoryOrderValid( | |
3017 ID, getConstantMemoryOrder(Instr->getArg(3)))) { | |
3018 Func->setError("Unexpected memory ordering for AtomicRMW"); | |
3019 return; | |
3020 } | |
3021 lowerAtomicRMW( | |
3022 Dest, static_cast<uint32_t>( | |
3023 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()), | |
3024 Instr->getArg(1), Instr->getArg(2)); | |
2568 return; | 3025 return; |
2569 } | 3026 } |
2570 case Intrinsics::Bswap: { | 3027 case Intrinsics::Bswap: { |
2571 Variable *Dest = Instr->getDest(); | |
2572 Operand *Val = Instr->getArg(0); | 3028 Operand *Val = Instr->getArg(0); |
2573 Type Ty = Val->getType(); | 3029 Type Ty = Val->getType(); |
2574 if (Ty == IceType_i64) { | 3030 if (Ty == IceType_i64) { |
2575 Val = legalizeUndef(Val); | 3031 Val = legalizeUndef(Val); |
2576 Variable *Val_Lo = legalizeToReg(loOperand(Val)); | 3032 Variable *Val_Lo = legalizeToReg(loOperand(Val)); |
2577 Variable *Val_Hi = legalizeToReg(hiOperand(Val)); | 3033 Variable *Val_Hi = legalizeToReg(hiOperand(Val)); |
2578 Variable *T_Lo = makeReg(IceType_i32); | 3034 Variable *T_Lo = makeReg(IceType_i32); |
2579 Variable *T_Hi = makeReg(IceType_i32); | 3035 Variable *T_Hi = makeReg(IceType_i32); |
2580 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3036 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
2581 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3037 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2582 _rev(T_Lo, Val_Lo); | 3038 _rev(T_Lo, Val_Lo); |
2583 _rev(T_Hi, Val_Hi); | 3039 _rev(T_Hi, Val_Hi); |
2584 _mov(DestLo, T_Hi); | 3040 _mov(DestLo, T_Hi); |
2585 _mov(DestHi, T_Lo); | 3041 _mov(DestHi, T_Lo); |
2586 } else { | 3042 } else { |
2587 assert(Ty == IceType_i32 || Ty == IceType_i16); | 3043 assert(Ty == IceType_i32 || Ty == IceType_i16); |
2588 Variable *ValR = legalizeToReg(Val); | 3044 Variable *ValR = legalizeToReg(Val); |
2589 Variable *T = makeReg(Ty); | 3045 Variable *T = makeReg(Ty); |
2590 _rev(T, ValR); | 3046 _rev(T, ValR); |
2591 if (Val->getType() == IceType_i16) { | 3047 if (Val->getType() == IceType_i16) { |
2592 Operand *Sixteen = | 3048 Operand *Sixteen = |
2593 legalize(Ctx->getConstantInt32(16), Legal_Reg | Legal_Flex); | 3049 legalize(Ctx->getConstantInt32(16), Legal_Reg | Legal_Flex); |
2594 _lsr(T, T, Sixteen); | 3050 _lsr(T, T, Sixteen); |
2595 } | 3051 } |
2596 _mov(Dest, T); | 3052 _mov(Dest, T); |
2597 } | 3053 } |
2598 return; | 3054 return; |
2599 } | 3055 } |
2600 case Intrinsics::Ctpop: { | 3056 case Intrinsics::Ctpop: { |
2601 Variable *Dest = Instr->getDest(); | |
2602 Operand *Val = Instr->getArg(0); | 3057 Operand *Val = Instr->getArg(0); |
2603 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) | 3058 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) |
2604 ? H_call_ctpop_i32 | 3059 ? H_call_ctpop_i32 |
2605 : H_call_ctpop_i64, | 3060 : H_call_ctpop_i64, |
2606 Dest, 1); | 3061 Dest, 1); |
2607 Call->addArg(Val); | 3062 Call->addArg(Val); |
2608 lowerCall(Call); | 3063 lowerCall(Call); |
2609 // The popcount helpers always return 32-bit values, while the intrinsic's | 3064 // The popcount helpers always return 32-bit values, while the intrinsic's |
2610 // signature matches some 64-bit platform's native instructions and expect | 3065 // signature matches some 64-bit platform's native instructions and expect |
2611 // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in | 3066 // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in |
(...skipping 14 matching lines...) Expand all Loading... | |
2626 Operand *Val = Instr->getArg(0); | 3081 Operand *Val = Instr->getArg(0); |
2627 Variable *ValLoR; | 3082 Variable *ValLoR; |
2628 Variable *ValHiR = nullptr; | 3083 Variable *ValHiR = nullptr; |
2629 if (Val->getType() == IceType_i64) { | 3084 if (Val->getType() == IceType_i64) { |
2630 Val = legalizeUndef(Val); | 3085 Val = legalizeUndef(Val); |
2631 ValLoR = legalizeToReg(loOperand(Val)); | 3086 ValLoR = legalizeToReg(loOperand(Val)); |
2632 ValHiR = legalizeToReg(hiOperand(Val)); | 3087 ValHiR = legalizeToReg(hiOperand(Val)); |
2633 } else { | 3088 } else { |
2634 ValLoR = legalizeToReg(Val); | 3089 ValLoR = legalizeToReg(Val); |
2635 } | 3090 } |
2636 lowerCLZ(Instr->getDest(), ValLoR, ValHiR); | 3091 lowerCLZ(Dest, ValLoR, ValHiR); |
2637 return; | 3092 return; |
2638 } | 3093 } |
2639 case Intrinsics::Cttz: { | 3094 case Intrinsics::Cttz: { |
2640 // Essentially like Clz, but reverse the bits first. | 3095 // Essentially like Clz, but reverse the bits first. |
2641 Operand *Val = Instr->getArg(0); | 3096 Operand *Val = Instr->getArg(0); |
2642 Variable *ValLoR; | 3097 Variable *ValLoR; |
2643 Variable *ValHiR = nullptr; | 3098 Variable *ValHiR = nullptr; |
2644 if (Val->getType() == IceType_i64) { | 3099 if (Val->getType() == IceType_i64) { |
2645 Val = legalizeUndef(Val); | 3100 Val = legalizeUndef(Val); |
2646 ValLoR = legalizeToReg(loOperand(Val)); | 3101 ValLoR = legalizeToReg(loOperand(Val)); |
2647 ValHiR = legalizeToReg(hiOperand(Val)); | 3102 ValHiR = legalizeToReg(hiOperand(Val)); |
2648 Variable *TLo = makeReg(IceType_i32); | 3103 Variable *TLo = makeReg(IceType_i32); |
2649 Variable *THi = makeReg(IceType_i32); | 3104 Variable *THi = makeReg(IceType_i32); |
2650 _rbit(TLo, ValLoR); | 3105 _rbit(TLo, ValLoR); |
2651 _rbit(THi, ValHiR); | 3106 _rbit(THi, ValHiR); |
2652 ValLoR = THi; | 3107 ValLoR = THi; |
2653 ValHiR = TLo; | 3108 ValHiR = TLo; |
2654 } else { | 3109 } else { |
2655 ValLoR = legalizeToReg(Val); | 3110 ValLoR = legalizeToReg(Val); |
2656 Variable *T = makeReg(IceType_i32); | 3111 Variable *T = makeReg(IceType_i32); |
2657 _rbit(T, ValLoR); | 3112 _rbit(T, ValLoR); |
2658 ValLoR = T; | 3113 ValLoR = T; |
2659 } | 3114 } |
2660 lowerCLZ(Instr->getDest(), ValLoR, ValHiR); | 3115 lowerCLZ(Dest, ValLoR, ValHiR); |
2661 return; | 3116 return; |
2662 } | 3117 } |
2663 case Intrinsics::Fabs: { | 3118 case Intrinsics::Fabs: { |
2664 Variable *Dest = Instr->getDest(); | |
2665 Type DestTy = Dest->getType(); | 3119 Type DestTy = Dest->getType(); |
2666 Variable *T = makeReg(DestTy); | 3120 Variable *T = makeReg(DestTy); |
2667 if (isVectorType(DestTy)) { | 3121 if (isVectorType(DestTy)) { |
2668 // Add a fake def to keep liveness consistent in the meantime. | 3122 // Add a fake def to keep liveness consistent in the meantime. |
2669 Context.insert(InstFakeDef::create(Func, T)); | 3123 Context.insert(InstFakeDef::create(Func, T)); |
2670 _mov(Instr->getDest(), T); | 3124 _mov(Dest, T); |
2671 UnimplementedError(Func->getContext()->getFlags()); | 3125 UnimplementedError(Func->getContext()->getFlags()); |
2672 return; | 3126 return; |
2673 } | 3127 } |
2674 _vabs(T, legalizeToReg(Instr->getArg(0))); | 3128 _vabs(T, legalizeToReg(Instr->getArg(0))); |
2675 _mov(Dest, T); | 3129 _mov(Dest, T); |
2676 return; | 3130 return; |
2677 } | 3131 } |
2678 case Intrinsics::Longjmp: { | 3132 case Intrinsics::Longjmp: { |
2679 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2); | 3133 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2); |
2680 Call->addArg(Instr->getArg(0)); | 3134 Call->addArg(Instr->getArg(0)); |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2714 Call->addArg(Instr->getArg(0)); | 3168 Call->addArg(Instr->getArg(0)); |
2715 Call->addArg(ValExt); | 3169 Call->addArg(ValExt); |
2716 Call->addArg(Instr->getArg(2)); | 3170 Call->addArg(Instr->getArg(2)); |
2717 lowerCall(Call); | 3171 lowerCall(Call); |
2718 return; | 3172 return; |
2719 } | 3173 } |
2720 case Intrinsics::NaClReadTP: { | 3174 case Intrinsics::NaClReadTP: { |
2721 if (Ctx->getFlags().getUseSandboxing()) { | 3175 if (Ctx->getFlags().getUseSandboxing()) { |
2722 UnimplementedError(Func->getContext()->getFlags()); | 3176 UnimplementedError(Func->getContext()->getFlags()); |
2723 } else { | 3177 } else { |
2724 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0); | 3178 InstCall *Call = makeHelperCall(H_call_read_tp, Dest, 0); |
2725 lowerCall(Call); | 3179 lowerCall(Call); |
2726 } | 3180 } |
2727 return; | 3181 return; |
2728 } | 3182 } |
2729 case Intrinsics::Setjmp: { | 3183 case Intrinsics::Setjmp: { |
2730 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1); | 3184 InstCall *Call = makeHelperCall(H_call_setjmp, Dest, 1); |
2731 Call->addArg(Instr->getArg(0)); | 3185 Call->addArg(Instr->getArg(0)); |
2732 lowerCall(Call); | 3186 lowerCall(Call); |
2733 return; | 3187 return; |
2734 } | 3188 } |
2735 case Intrinsics::Sqrt: { | 3189 case Intrinsics::Sqrt: { |
2736 Variable *Src = legalizeToReg(Instr->getArg(0)); | 3190 Variable *Src = legalizeToReg(Instr->getArg(0)); |
2737 Variable *Dest = Instr->getDest(); | |
2738 Variable *T = makeReg(Dest->getType()); | 3191 Variable *T = makeReg(Dest->getType()); |
2739 _vsqrt(T, Src); | 3192 _vsqrt(T, Src); |
2740 _mov(Dest, T); | 3193 _mov(Dest, T); |
2741 return; | 3194 return; |
2742 } | 3195 } |
2743 case Intrinsics::Stacksave: { | 3196 case Intrinsics::Stacksave: { |
2744 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 3197 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
2745 Variable *Dest = Instr->getDest(); | |
2746 _mov(Dest, SP); | 3198 _mov(Dest, SP); |
2747 return; | 3199 return; |
2748 } | 3200 } |
2749 case Intrinsics::Stackrestore: { | 3201 case Intrinsics::Stackrestore: { |
2750 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 3202 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
2751 Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex); | 3203 Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex); |
2752 _mov_redefined(SP, Val); | 3204 _mov_redefined(SP, Val); |
2753 return; | 3205 return; |
2754 } | 3206 } |
2755 case Intrinsics::Trap: | 3207 case Intrinsics::Trap: |
(...skipping 461 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3217 } | 3669 } |
3218 // If we didn't do address mode optimization, then we only have a base/offset | 3670 // If we didn't do address mode optimization, then we only have a base/offset |
3219 // to work with. ARM always requires a base register, so just use that to | 3671 // to work with. ARM always requires a base register, so just use that to |
3220 // hold the operand. | 3672 // hold the operand. |
3221 Variable *Base = legalizeToReg(Operand); | 3673 Variable *Base = legalizeToReg(Operand); |
3222 return OperandARM32Mem::create( | 3674 return OperandARM32Mem::create( |
3223 Func, Ty, Base, | 3675 Func, Ty, Base, |
3224 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); | 3676 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); |
3225 } | 3677 } |
3226 | 3678 |
3679 Variable64On32 *TargetARM32::makeI64RegPair() { | |
3680 Variable64On32 *Reg = | |
3681 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | |
3682 Reg->setMustHaveReg(); | |
3683 Reg->initHiLo(Func); | |
3684 Reg->getLo()->setMustNotHaveReg(); | |
3685 Reg->getHi()->setMustNotHaveReg(); | |
3686 return Reg; | |
3687 } | |
3688 | |
3227 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) { | 3689 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) { |
3228 // There aren't any 64-bit integer registers for ARM32. | 3690 // There aren't any 64-bit integer registers for ARM32. |
3229 assert(Type != IceType_i64); | 3691 assert(Type != IceType_i64); |
3230 Variable *Reg = Func->makeVariable(Type); | 3692 Variable *Reg = Func->makeVariable(Type); |
3231 if (RegNum == Variable::NoRegister) | 3693 if (RegNum == Variable::NoRegister) |
3232 Reg->setMustHaveReg(); | 3694 Reg->setMustHaveReg(); |
3233 else | 3695 else |
3234 Reg->setRegNum(RegNum); | 3696 Reg->setRegNum(RegNum); |
3235 return Reg; | 3697 return Reg; |
3236 } | 3698 } |
(...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3479 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; | 3941 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; |
3480 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { | 3942 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { |
3481 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; | 3943 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; |
3482 } | 3944 } |
3483 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 3945 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
3484 // However, for compatibility with current NaCl LLVM, don't claim that. | 3946 // However, for compatibility with current NaCl LLVM, don't claim that. |
3485 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 3947 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
3486 } | 3948 } |
3487 | 3949 |
3488 } // end of namespace Ice | 3950 } // end of namespace Ice |
OLD | NEW |