Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(981)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1369333003: Subzero. Enable Atomics in ARM. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments; make format; git pull; merge. Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after
188 I64PairRegisters[RegARM32::val] = isI64Pair; \ 188 I64PairRegisters[RegARM32::val] = isI64Pair; \
189 Float32Registers[RegARM32::val] = isFP32; \ 189 Float32Registers[RegARM32::val] = isFP32; \
190 Float64Registers[RegARM32::val] = isFP64; \ 190 Float64Registers[RegARM32::val] = isFP64; \
191 VectorRegisters[RegARM32::val] = isVec128; \ 191 VectorRegisters[RegARM32::val] = isVec128; \
192 RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \ 192 RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \
193 for (SizeT RegAlias : alias_init) { \ 193 for (SizeT RegAlias : alias_init) { \
194 assert(!RegisterAliases[RegARM32::val][RegAlias] && \ 194 assert(!RegisterAliases[RegARM32::val][RegAlias] && \
195 "Duplicate alias for " #val); \ 195 "Duplicate alias for " #val); \
196 RegisterAliases[RegARM32::val].set(RegAlias); \ 196 RegisterAliases[RegARM32::val].set(RegAlias); \
197 } \ 197 } \
198 RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \
199 assert(RegisterAliases[RegARM32::val][RegARM32::val]); \ 198 assert(RegisterAliases[RegARM32::val][RegARM32::val]); \
200 ScratchRegs[RegARM32::val] = scratch; 199 ScratchRegs[RegARM32::val] = scratch;
201 REGARM32_TABLE; 200 REGARM32_TABLE;
202 #undef X 201 #undef X
203 TypeToRegisterSet[IceType_void] = InvalidRegisters; 202 TypeToRegisterSet[IceType_void] = InvalidRegisters;
204 TypeToRegisterSet[IceType_i1] = IntegerRegisters; 203 TypeToRegisterSet[IceType_i1] = IntegerRegisters;
205 TypeToRegisterSet[IceType_i8] = IntegerRegisters; 204 TypeToRegisterSet[IceType_i8] = IntegerRegisters;
206 TypeToRegisterSet[IceType_i16] = IntegerRegisters; 205 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
207 TypeToRegisterSet[IceType_i32] = IntegerRegisters; 206 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
208 TypeToRegisterSet[IceType_i64] = I64PairRegisters; 207 TypeToRegisterSet[IceType_i64] = I64PairRegisters;
209 TypeToRegisterSet[IceType_f32] = Float32Registers; 208 TypeToRegisterSet[IceType_f32] = Float32Registers;
210 TypeToRegisterSet[IceType_f64] = Float64Registers; 209 TypeToRegisterSet[IceType_f64] = Float64Registers;
211 TypeToRegisterSet[IceType_v4i1] = VectorRegisters; 210 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
212 TypeToRegisterSet[IceType_v8i1] = VectorRegisters; 211 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
213 TypeToRegisterSet[IceType_v16i1] = VectorRegisters; 212 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
214 TypeToRegisterSet[IceType_v16i8] = VectorRegisters; 213 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
215 TypeToRegisterSet[IceType_v8i16] = VectorRegisters; 214 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
216 TypeToRegisterSet[IceType_v4i32] = VectorRegisters; 215 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
217 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; 216 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
218 } 217 }
219 218
219 namespace {
220 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) {
221 for (Variable *Var : Vars) {
222 auto *Var64 = llvm::dyn_cast<Variable64On32>(Var);
223 if (!Var64) {
224 // This is not the variable we are looking for.
225 continue;
226 }
227 assert(Var64->hasReg() || !Var64->mustHaveReg());
228 if (!Var64->hasReg()) {
229 continue;
230 }
231 SizeT FirstReg = RegARM32::getI64PairFirstGPRNum(Var->getRegNum());
232 // This assumes little endian.
233 Variable *Lo = Var64->getLo();
234 Variable *Hi = Var64->getHi();
235 assert(Lo->hasReg() == Hi->hasReg());
236 if (Lo->hasReg()) {
237 continue;
238 }
239 Lo->setRegNum(FirstReg);
240 Lo->setMustHaveReg();
241 Hi->setRegNum(FirstReg + 1);
242 Hi->setMustHaveReg();
243 }
244 }
245 } // end of anonymous namespace
246
220 void TargetARM32::translateO2() { 247 void TargetARM32::translateO2() {
221 TimerMarker T(TimerStack::TT_O2, Func); 248 TimerMarker T(TimerStack::TT_O2, Func);
222 249
223 // TODO(stichnot): share passes with X86? 250 // TODO(stichnot): share passes with X86?
224 // https://code.google.com/p/nativeclient/issues/detail?id=4094 251 // https://code.google.com/p/nativeclient/issues/detail?id=4094
225 252
226 if (!Ctx->getFlags().getPhiEdgeSplit()) { 253 if (!Ctx->getFlags().getPhiEdgeSplit()) {
227 // Lower Phi instructions. 254 // Lower Phi instructions.
228 Func->placePhiLoads(); 255 Func->placePhiLoads();
229 if (Func->hasError()) 256 if (Func->hasError())
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
277 // Validate the live range computations. The expensive validation call is 304 // Validate the live range computations. The expensive validation call is
278 // deliberately only made when assertions are enabled. 305 // deliberately only made when assertions are enabled.
279 assert(Func->validateLiveness()); 306 assert(Func->validateLiveness());
280 // The post-codegen dump is done here, after liveness analysis and associated 307 // The post-codegen dump is done here, after liveness analysis and associated
281 // cleanup, to make the dump cleaner and more useful. 308 // cleanup, to make the dump cleaner and more useful.
282 Func->dump("After initial ARM32 codegen"); 309 Func->dump("After initial ARM32 codegen");
283 Func->getVMetadata()->init(VMK_All); 310 Func->getVMetadata()->init(VMK_All);
284 regAlloc(RAK_Global); 311 regAlloc(RAK_Global);
285 if (Func->hasError()) 312 if (Func->hasError())
286 return; 313 return;
314 copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
287 Func->dump("After linear scan regalloc"); 315 Func->dump("After linear scan regalloc");
288 316
289 if (Ctx->getFlags().getPhiEdgeSplit()) { 317 if (Ctx->getFlags().getPhiEdgeSplit()) {
290 Func->advancedPhiLowering(); 318 Func->advancedPhiLowering();
291 Func->dump("After advanced Phi lowering"); 319 Func->dump("After advanced Phi lowering");
292 } 320 }
293 321
294 // Stack frame mapping. 322 // Stack frame mapping.
295 Func->genFrame(); 323 Func->genFrame();
296 if (Func->hasError()) 324 if (Func->hasError())
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
337 Func->doArgLowering(); 365 Func->doArgLowering();
338 366
339 Func->genCode(); 367 Func->genCode();
340 if (Func->hasError()) 368 if (Func->hasError())
341 return; 369 return;
342 Func->dump("After initial ARM32 codegen"); 370 Func->dump("After initial ARM32 codegen");
343 371
344 regAlloc(RAK_InfOnly); 372 regAlloc(RAK_InfOnly);
345 if (Func->hasError()) 373 if (Func->hasError())
346 return; 374 return;
375 copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
347 Func->dump("After regalloc of infinite-weight variables"); 376 Func->dump("After regalloc of infinite-weight variables");
348 377
349 Func->genFrame(); 378 Func->genFrame();
350 if (Func->hasError()) 379 if (Func->hasError())
351 return; 380 return;
352 Func->dump("After stack frame mapping"); 381 Func->dump("After stack frame mapping");
353 382
354 legalizeStackSlots(); 383 legalizeStackSlots();
355 if (Func->hasError()) 384 if (Func->hasError())
356 return; 385 return;
(...skipping 252 matching lines...) Expand 10 before | Expand all | Expand 10 after
609 // value from the stack slot. 638 // value from the stack slot.
610 if (Arg->hasReg()) { 639 if (Arg->hasReg()) {
611 assert(Ty != IceType_i64); 640 assert(Ty != IceType_i64);
612 // This should be simple, just load the parameter off the stack using a nice 641 // This should be simple, just load the parameter off the stack using a nice
613 // sp + imm addressing mode. Because ARM, we can't do that (e.g., VLDR, for 642 // sp + imm addressing mode. Because ARM, we can't do that (e.g., VLDR, for
614 // fp types, cannot have an index register), so we legalize the memory 643 // fp types, cannot have an index register), so we legalize the memory
615 // operand instead. 644 // operand instead.
616 auto *Mem = OperandARM32Mem::create( 645 auto *Mem = OperandARM32Mem::create(
617 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( 646 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
618 Ctx->getConstantInt32(Arg->getStackOffset()))); 647 Ctx->getConstantInt32(Arg->getStackOffset())));
619 legalizeToReg(Mem, Arg->getRegNum()); 648 _mov(Arg, legalizeToReg(Mem, Arg->getRegNum()));
620 // This argument-copying instruction uses an explicit OperandARM32Mem 649 // This argument-copying instruction uses an explicit OperandARM32Mem
621 // operand instead of a Variable, so its fill-from-stack operation has to 650 // operand instead of a Variable, so its fill-from-stack operation has to
622 // be tracked separately for statistics. 651 // be tracked separately for statistics.
623 Ctx->statsUpdateFills(); 652 Ctx->statsUpdateFills();
624 } 653 }
625 } 654 }
626 655
627 Type TargetARM32::stackSlotType() { return IceType_i32; } 656 Type TargetARM32::stackSlotType() { return IceType_i32; }
628 657
629 void TargetARM32::addProlog(CfgNode *Node) { 658 void TargetARM32::addProlog(CfgNode *Node) {
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
709 if (UsesFramePointer) { 738 if (UsesFramePointer) {
710 CalleeSaves[RegARM32::Reg_fp] = true; 739 CalleeSaves[RegARM32::Reg_fp] = true;
711 assert(RegsUsed[RegARM32::Reg_fp] == false); 740 assert(RegsUsed[RegARM32::Reg_fp] == false);
712 RegsUsed[RegARM32::Reg_fp] = true; 741 RegsUsed[RegARM32::Reg_fp] = true;
713 } 742 }
714 if (!MaybeLeafFunc) { 743 if (!MaybeLeafFunc) {
715 CalleeSaves[RegARM32::Reg_lr] = true; 744 CalleeSaves[RegARM32::Reg_lr] = true;
716 RegsUsed[RegARM32::Reg_lr] = true; 745 RegsUsed[RegARM32::Reg_lr] = true;
717 } 746 }
718 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 747 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
748 if (RegARM32::isI64RegisterPair(i)) {
749 // We don't save register pairs explicitly. Instead, we rely on the code
750 // fake-defing/fake-using each register in the pair.
751 continue;
752 }
719 if (CalleeSaves[i] && RegsUsed[i]) { 753 if (CalleeSaves[i] && RegsUsed[i]) {
720 // TODO(jvoung): do separate vpush for each floating point register 754 // TODO(jvoung): do separate vpush for each floating point register
721 // segment and += 4, or 8 depending on type. 755 // segment and += 4, or 8 depending on type.
722 ++NumCallee; 756 ++NumCallee;
723 PreservedRegsSizeBytes += 4; 757 PreservedRegsSizeBytes += 4;
724 GPRsToPreserve.push_back(getPhysicalRegister(i)); 758 GPRsToPreserve.push_back(getPhysicalRegister(i));
725 } 759 }
726 } 760 }
727 Ctx->statsUpdateRegistersSaved(NumCallee); 761 Ctx->statsUpdateRegistersSaved(NumCallee);
728 if (!GPRsToPreserve.empty()) 762 if (!GPRsToPreserve.empty())
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after
877 // Consider FP and LR as callee-save / used as needed. 911 // Consider FP and LR as callee-save / used as needed.
878 if (UsesFramePointer) { 912 if (UsesFramePointer) {
879 CalleeSaves[RegARM32::Reg_fp] = true; 913 CalleeSaves[RegARM32::Reg_fp] = true;
880 } 914 }
881 if (!MaybeLeafFunc) { 915 if (!MaybeLeafFunc) {
882 CalleeSaves[RegARM32::Reg_lr] = true; 916 CalleeSaves[RegARM32::Reg_lr] = true;
883 } 917 }
884 // Pop registers in ascending order just like push (instead of in reverse 918 // Pop registers in ascending order just like push (instead of in reverse
885 // order). 919 // order).
886 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 920 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
921 if (RegARM32::isI64RegisterPair(i)) {
922 continue;
923 }
924
887 if (CalleeSaves[i] && RegsUsed[i]) { 925 if (CalleeSaves[i] && RegsUsed[i]) {
888 GPRsToRestore.push_back(getPhysicalRegister(i)); 926 GPRsToRestore.push_back(getPhysicalRegister(i));
889 } 927 }
890 } 928 }
891 if (!GPRsToRestore.empty()) 929 if (!GPRsToRestore.empty())
892 _pop(GPRsToRestore); 930 _pop(GPRsToRestore);
893 931
894 if (!Ctx->getFlags().getUseSandboxing()) 932 if (!Ctx->getFlags().getUseSandboxing())
895 return; 933 return;
896 934
(...skipping 835 matching lines...) Expand 10 before | Expand all | Expand 10 after
1732 Operand *Src0 = Inst->getSrc(0); 1770 Operand *Src0 = Inst->getSrc(0);
1733 assert(Dest->getType() == Src0->getType()); 1771 assert(Dest->getType() == Src0->getType());
1734 if (Dest->getType() == IceType_i64) { 1772 if (Dest->getType() == IceType_i64) {
1735 Src0 = legalizeUndef(Src0); 1773 Src0 = legalizeUndef(Src0);
1736 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); 1774 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
1737 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); 1775 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
1738 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1776 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1739 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1777 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1740 Variable *T_Lo = makeReg(IceType_i32); 1778 Variable *T_Lo = makeReg(IceType_i32);
1741 Variable *T_Hi = makeReg(IceType_i32); 1779 Variable *T_Hi = makeReg(IceType_i32);
1780
1742 _mov(T_Lo, Src0Lo); 1781 _mov(T_Lo, Src0Lo);
1743 _mov(DestLo, T_Lo); 1782 _mov(DestLo, T_Lo);
1744 _mov(T_Hi, Src0Hi); 1783 _mov(T_Hi, Src0Hi);
1745 _mov(DestHi, T_Hi); 1784 _mov(DestHi, T_Hi);
1746 } else { 1785 } else {
1747 Operand *NewSrc; 1786 Operand *NewSrc;
1748 if (Dest->hasReg()) { 1787 if (Dest->hasReg()) {
1749 // If Dest already has a physical register, then legalize the Src operand 1788 // If Dest already has a physical register, then legalize the Src operand
1750 // into a Variable with the same register assignment. This especially 1789 // into a Variable with the same register assignment. This especially
1751 // helps allow the use of Flex operands. 1790 // helps allow the use of Flex operands.
(...skipping 512 matching lines...) Expand 10 before | Expand all | Expand 10 after
2264 case IceType_i64: { 2303 case IceType_i64: {
2265 // t0, t1 <- src0 2304 // t0, t1 <- src0
2266 // dest[31..0] = t0 2305 // dest[31..0] = t0
2267 // dest[63..32] = t1 2306 // dest[63..32] = t1
2268 assert(Src0->getType() == IceType_f64); 2307 assert(Src0->getType() == IceType_f64);
2269 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); 2308 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
2270 T->initHiLo(Func); 2309 T->initHiLo(Func);
2271 configureBitcastTemporary(T); 2310 configureBitcastTemporary(T);
2272 Variable *Src0R = legalizeToReg(Src0); 2311 Variable *Src0R = legalizeToReg(Src0);
2273 _mov(T, Src0R); 2312 _mov(T, Src0R);
2274 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); 2313 lowerAssign(InstAssign::create(Func, Dest, T));
2275 lowerAssign(InstAssign::create(Func, Dest64On32->getLo(), T->getLo()));
2276 lowerAssign(InstAssign::create(Func, Dest64On32->getHi(), T->getHi()));
2277 break; 2314 break;
2278 } 2315 }
2279 case IceType_f64: { 2316 case IceType_f64: {
2280 // T0 <- lo(src) 2317 // T0 <- lo(src)
2281 // T1 <- hi(src) 2318 // T1 <- hi(src)
2282 // vmov T2, T0, T1 2319 // vmov T2, T0, T1
2283 // Dest <- T2 2320 // Dest <- T2
2284 assert(Src0->getType() == IceType_i64); 2321 assert(Src0->getType() == IceType_i64);
2322 Variable *T = makeReg(DestType);
2285 auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); 2323 auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
2286 Src64->initHiLo(Func); 2324 Src64->initHiLo(Func);
2287 configureBitcastTemporary(Src64); 2325 configureBitcastTemporary(Src64);
2288 lowerAssign(InstAssign::create(Func, Src64, Src0)); 2326 lowerAssign(InstAssign::create(Func, Src64, Src0));
2289 Variable *T = makeReg(IceType_f64);
2290 _mov(T, Src64); 2327 _mov(T, Src64);
2291 lowerAssign(InstAssign::create(Func, Dest, T)); 2328 lowerAssign(InstAssign::create(Func, Dest, T));
2292 break; 2329 break;
2293 } 2330 }
2294 case IceType_v4i1: 2331 case IceType_v4i1:
2295 case IceType_v8i1: 2332 case IceType_v8i1:
2296 case IceType_v16i1: 2333 case IceType_v16i1:
2297 case IceType_v8i16: 2334 case IceType_v8i16:
2298 case IceType_v16i8: 2335 case IceType_v16i8:
2299 case IceType_v4f32: 2336 case IceType_v4f32:
(...skipping 230 matching lines...) Expand 10 before | Expand all | Expand 10 after
2530 _mov_redefined(T, One, getIcmp32Mapping(Inst->getCondition())); 2567 _mov_redefined(T, One, getIcmp32Mapping(Inst->getCondition()));
2531 _mov(Dest, T); 2568 _mov(Dest, T);
2532 return; 2569 return;
2533 } 2570 }
2534 2571
2535 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) { 2572 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {
2536 (void)Inst; 2573 (void)Inst;
2537 UnimplementedError(Func->getContext()->getFlags()); 2574 UnimplementedError(Func->getContext()->getFlags());
2538 } 2575 }
2539 2576
2577 namespace {
2578 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
2579 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
2580 return Integer->getValue();
2581 return Intrinsics::MemoryOrderInvalid;
2582 }
2583 } // end of anonymous namespace
2584
2585 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
2586 Operand *Ptr, Operand *Val) {
2587 // retry:
2588 // ldrex contents, [addr]
2589 // op tmp, contents, operand
2590 // strex success, tmp, [addr]
2591 // jne retry
2592 // fake-use(addr, operand) @ prevents undesirable clobbering.
2593 // mov dest, contents
2594 assert(Dest != nullptr);
2595 Type DestTy = Dest->getType();
2596 (void)Ptr;
2597 (void)Val;
2598
2599 OperandARM32Mem *Mem;
2600 Variable *PtrContentsReg;
2601 Variable *PtrContentsHiReg;
2602 Variable *PtrContentsLoReg;
2603 Variable *Value = Func->makeVariable(DestTy);
2604 Variable *ValueReg;
2605 Variable *ValueHiReg;
2606 Variable *ValueLoReg;
2607 Variable *Success = makeReg(IceType_i32);
2608 Variable *TmpReg;
2609 Variable *TmpHiReg;
2610 Variable *TmpLoReg;
2611 Operand *_0 = Ctx->getConstantZero(IceType_i32);
2612 InstARM32Label *Retry = InstARM32Label::create(Func, this);
2613
2614 if (DestTy == IceType_i64) {
2615 Variable64On32 *PtrContentsReg64 = makeI64RegPair();
2616 PtrContentsHiReg = PtrContentsReg64->getHi();
2617 PtrContentsLoReg = PtrContentsReg64->getLo();
2618 PtrContentsReg = PtrContentsReg64;
2619
2620 llvm::cast<Variable64On32>(Value)->initHiLo(Func);
2621 Variable64On32 *ValueReg64 = makeI64RegPair();
2622 ValueHiReg = ValueReg64->getHi();
2623 ValueLoReg = ValueReg64->getLo();
2624 ValueReg = ValueReg64;
2625
2626 Variable64On32 *TmpReg64 = makeI64RegPair();
2627 TmpHiReg = TmpReg64->getHi();
2628 TmpLoReg = TmpReg64->getLo();
2629 TmpReg = TmpReg64;
2630 } else {
2631 PtrContentsReg = makeReg(DestTy);
2632 PtrContentsHiReg = nullptr;
2633 PtrContentsLoReg = PtrContentsReg;
2634
2635 ValueReg = makeReg(DestTy);
2636 ValueHiReg = nullptr;
2637 ValueLoReg = ValueReg;
2638
2639 TmpReg = makeReg(DestTy);
2640 TmpHiReg = nullptr;
2641 TmpLoReg = TmpReg;
2642 }
2643
2644 if (DestTy == IceType_i64) {
2645 Context.insert(InstFakeDef::create(Func, Value));
2646 }
2647 lowerAssign(InstAssign::create(Func, Value, Val));
2648
2649 Variable *PtrVar = Func->makeVariable(IceType_i32);
2650 lowerAssign(InstAssign::create(Func, PtrVar, Ptr));
2651
2652 _dmb();
2653 Context.insert(Retry);
2654 Mem = formMemoryOperand(PtrVar, DestTy);
2655 if (DestTy == IceType_i64) {
2656 Context.insert(InstFakeDef::create(Func, ValueReg, Value));
2657 }
2658 lowerAssign(InstAssign::create(Func, ValueReg, Value));
2659 if (DestTy == IceType_i8 || DestTy == IceType_i16) {
2660 _uxt(ValueReg, ValueReg);
2661 }
2662 _ldrex(PtrContentsReg, Mem);
2663
2664 if (DestTy == IceType_i64) {
2665 Context.insert(InstFakeDef::create(Func, TmpReg, ValueReg));
2666 }
2667 switch (Operation) {
2668 default:
2669 Func->setError("Unknown AtomicRMW operation");
2670 return;
2671 case Intrinsics::AtomicAdd:
2672 if (DestTy == IceType_i64) {
2673 _adds(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2674 _adc(TmpHiReg, PtrContentsHiReg, ValueHiReg);
2675 } else {
2676 _add(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2677 }
2678 break;
2679 case Intrinsics::AtomicSub:
2680 if (DestTy == IceType_i64) {
2681 _subs(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2682 _sbc(TmpHiReg, PtrContentsHiReg, ValueHiReg);
2683 } else {
2684 _sub(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2685 }
2686 break;
2687 case Intrinsics::AtomicOr:
2688 _orr(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2689 if (DestTy == IceType_i64) {
2690 _orr(TmpHiReg, PtrContentsHiReg, ValueHiReg);
2691 }
2692 break;
2693 case Intrinsics::AtomicAnd:
2694 _and(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2695 if (DestTy == IceType_i64) {
2696 _and(TmpHiReg, PtrContentsHiReg, ValueHiReg);
2697 }
2698 break;
2699 case Intrinsics::AtomicXor:
2700 _eor(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2701 if (DestTy == IceType_i64) {
2702 _eor(TmpHiReg, PtrContentsHiReg, ValueHiReg);
2703 }
2704 break;
2705 case Intrinsics::AtomicExchange:
2706 _mov(TmpLoReg, ValueLoReg);
2707 if (DestTy == IceType_i64) {
2708 _mov(TmpHiReg, ValueHiReg);
2709 }
2710 break;
2711 }
2712 _strex(Success, TmpReg, Mem);
2713 _cmp(Success, _0);
2714 _br(Retry, CondARM32::NE);
2715
2716 // The following fake-uses ensure that Subzero will not clobber them in the
2717 // load-linked/store-conditional loop above. We might have to spill them, but
2718 // spilling is preferable over incorrect behavior.
2719 Context.insert(InstFakeUse::create(Func, PtrVar));
2720 if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) {
2721 Context.insert(InstFakeUse::create(Func, Value64->getHi()));
2722 Context.insert(InstFakeUse::create(Func, Value64->getLo()));
2723 } else {
2724 Context.insert(InstFakeUse::create(Func, Value));
2725 }
2726 _dmb();
2727 if (DestTy == IceType_i8 || DestTy == IceType_i16) {
2728 _uxt(PtrContentsReg, PtrContentsReg);
2729 }
2730
2731 if (DestTy == IceType_i64) {
2732 Context.insert(InstFakeUse::create(Func, PtrContentsReg));
2733 }
2734 lowerAssign(InstAssign::create(Func, Dest, PtrContentsReg));
2735 if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
2736 Context.insert(InstFakeUse::create(Func, Dest64->getLo()));
2737 Context.insert(InstFakeUse::create(Func, Dest64->getHi()));
2738 } else {
2739 Context.insert(InstFakeUse::create(Func, Dest));
2740 }
2741 }
2742
2540 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 2743 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2541 switch (Instr->getIntrinsicInfo().ID) { 2744 Variable *Dest = Instr->getDest();
2745 Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void;
2746 Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID;
2747 switch (ID) {
2748 case Intrinsics::AtomicFence:
2749 case Intrinsics::AtomicFenceAll:
2750 assert(Dest == nullptr);
2751 _dmb();
2752 return;
2753 case Intrinsics::AtomicIsLockFree: {
2754 Operand *ByteSize = Instr->getArg(0);
2755 auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
2756 if (CI == nullptr) {
2757 // The PNaCl ABI requires the byte size to be a compile-time constant.
2758 Func->setError("AtomicIsLockFree byte size should be compile-time const");
2759 return;
2760 }
2761 static constexpr int32_t NotLockFree = 0;
2762 static constexpr int32_t LockFree = 1;
2763 int32_t Result = NotLockFree;
2764 switch (CI->getValue()) {
2765 case 1:
2766 case 2:
2767 case 4:
2768 case 8:
2769 Result = LockFree;
2770 break;
2771 }
2772 _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result)));
2773 return;
2774 }
2775 case Intrinsics::AtomicLoad: {
2776 assert(isScalarIntegerType(DestTy));
2777 // We require the memory address to be naturally aligned. Given that is the
2778 // case, then normal loads are atomic.
2779 if (!Intrinsics::isMemoryOrderValid(
2780 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
2781 Func->setError("Unexpected memory ordering for AtomicLoad");
2782 return;
2783 }
2784 Variable *T;
2785
2786 if (DestTy == IceType_i64) {
2787 // ldrex is the only arm instruction that is guaranteed to load a 64-bit
2788 // integer atomically. Everything else works with a regular ldr.
2789 T = makeI64RegPair();
2790 _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64));
2791 } else {
2792 T = makeReg(DestTy);
2793 _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy));
2794 }
2795 _dmb();
2796 lowerAssign(InstAssign::create(Func, Dest, T));
2797 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
2798 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert
2799 // the FakeUse on the last-inserted instruction's dest.
2800 Context.insert(
2801 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
2802 return;
2803 }
2804 case Intrinsics::AtomicStore: {
2805 // We require the memory address to be naturally aligned. Given that is the
2806 // case, then normal loads are atomic.
2807 if (!Intrinsics::isMemoryOrderValid(
2808 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
2809 Func->setError("Unexpected memory ordering for AtomicStore");
2810 return;
2811 }
2812 Operand *Value = Instr->getArg(0);
2813 Type ValueTy = Value->getType();
2814 assert(isScalarIntegerType(ValueTy));
2815 Operand *Addr = Instr->getArg(1);
2816
2817 if (ValueTy == IceType_i64) {
2818 // Atomic 64-bit stores require a load-locked/store-conditional loop using
2819 // ldrexd, and strexd. The lowered code is:
2820 //
2821 // retry:
2822 // ldrexd t.lo, t.hi, [addr]
2823 // strexd success, value.lo, value.hi, [addr]
2824 // cmp success, #0
2825 // bne retry
2826 // fake-use(addr, value.lo, value.hi)
2827 //
2828 // The fake-use is needed to prevent those variables from being clobbered
2829 // in the loop (which will happen under register pressure.)
2830 Variable64On32 *Tmp = makeI64RegPair();
2831 Variable64On32 *ValueVar =
2832 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
2833 Variable *AddrVar = makeReg(IceType_i32);
2834 Variable *Success = makeReg(IceType_i32);
2835 OperandARM32Mem *Mem;
2836 Operand *_0 = Ctx->getConstantZero(IceType_i32);
2837 InstARM32Label *Retry = InstARM32Label::create(Func, this);
2838 Variable64On32 *NewReg = makeI64RegPair();
2839 ValueVar->initHiLo(Func);
2840 ValueVar->mustNotHaveReg();
2841
2842 _dmb();
2843 lowerAssign(InstAssign::create(Func, ValueVar, Value));
2844 lowerAssign(InstAssign::create(Func, AddrVar, Addr));
2845
2846 Context.insert(Retry);
2847 Context.insert(InstFakeDef::create(Func, NewReg));
2848 lowerAssign(InstAssign::create(Func, NewReg, ValueVar));
2849 Mem = formMemoryOperand(AddrVar, IceType_i64);
2850 _ldrex(Tmp, Mem);
2851 // This fake-use both prevents the ldrex from being dead-code eliminated,
2852 // while also keeping liveness happy about all defs being used.
2853 Context.insert(
2854 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
2855 _strex(Success, NewReg, Mem);
2856 _cmp(Success, _0);
2857 _br(Retry, CondARM32::NE);
2858
2859 Context.insert(InstFakeUse::create(Func, ValueVar->getLo()));
2860 Context.insert(InstFakeUse::create(Func, ValueVar->getHi()));
2861 Context.insert(InstFakeUse::create(Func, AddrVar));
2862 _dmb();
2863 return;
2864 }
2865 // non-64-bit stores are atomically as long as the address is aligned. This
2866 // is PNaCl, so addresses are aligned.
2867 Variable *T = makeReg(ValueTy);
2868
2869 _dmb();
2870 lowerAssign(InstAssign::create(Func, T, Value));
2871 _str(T, formMemoryOperand(Addr, ValueTy));
2872 _dmb();
2873 return;
2874 }
2542 case Intrinsics::AtomicCmpxchg: { 2875 case Intrinsics::AtomicCmpxchg: {
2543 UnimplementedError(Func->getContext()->getFlags()); 2876 // The initial lowering for cmpxchg was:
2544 return; 2877 //
2545 } 2878 // retry:
2546 case Intrinsics::AtomicFence: 2879 // ldrex tmp, [addr]
2547 UnimplementedError(Func->getContext()->getFlags()); 2880 // cmp tmp, expected
2548 return; 2881 // mov expected, tmp
2549 case Intrinsics::AtomicFenceAll: 2882 // jne retry
2550 // NOTE: FenceAll should prevent and load/store from being moved across the 2883 // strex success, new, [addr]
2551 // fence (both atomic and non-atomic). The InstARM32Mfence instruction is 2884 // cmp success, #0
2552 // currently marked coarsely as "HasSideEffects". 2885 // bne retry
2553 UnimplementedError(Func->getContext()->getFlags()); 2886 // mov dest, expected
2554 return; 2887 //
2555 case Intrinsics::AtomicIsLockFree: { 2888 // Besides requiring two branches, that lowering could also potentially
2556 UnimplementedError(Func->getContext()->getFlags()); 2889 // write to memory (in mov expected, tmp) unless we were OK with increasing
2557 return; 2890 // the register pressure and requiring expected to be an infinite-weight
2558 } 2891 // variable (spoiler alert: that was a problem for i64 cmpxchg.) Through
2559 case Intrinsics::AtomicLoad: { 2892 // careful rewritting, and thanks to predication, we now implement the
2560 UnimplementedError(Func->getContext()->getFlags()); 2893 // lowering as:
2561 return; 2894 //
2562 } 2895 // retry:
2563 case Intrinsics::AtomicRMW: 2896 // ldrex tmp, [addr]
2564 UnimplementedError(Func->getContext()->getFlags()); 2897 // cmp tmp, expected
2565 return; 2898 // strexeq success, new, [addr]
2566 case Intrinsics::AtomicStore: { 2899 // movne expected, tmp
2567 UnimplementedError(Func->getContext()->getFlags()); 2900 // cmpeq success, #0
2901 // bne retry
2902 // mov dest, expected
2903 //
2904 // Predication lets us move the strex ahead of the mov expected, tmp, which
2905 // allows tmp to be a non-infinite weight temporary. We wanted to avoid
2906 // writing to memory between ldrex and strex because, even though most times
2907 // that would cause no issues, if any interleaving memory write aliased
2908 // [addr] than we would have undefined behavior. Undefined behavior isn't
2909 // cool, so we try to avoid it. See the "Synchronization and semaphores"
2910 // section of the "ARM Architecture Reference Manual."
2911
2912 assert(isScalarIntegerType(DestTy));
2913 // We require the memory address to be naturally aligned. Given that is the
2914 // case, then normal loads are atomic.
2915 if (!Intrinsics::isMemoryOrderValid(
2916 ID, getConstantMemoryOrder(Instr->getArg(3)),
2917 getConstantMemoryOrder(Instr->getArg(4)))) {
2918 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
2919 return;
2920 }
2921
2922 OperandARM32Mem *Mem;
2923 Variable *TmpReg;
2924 Variable *Expected, *ExpectedReg;
2925 Variable *New, *NewReg;
2926 Variable *Success = makeReg(IceType_i32);
2927 Operand *_0 = Ctx->getConstantZero(IceType_i32);
2928 InstARM32Label *Retry = InstARM32Label::create(Func, this);
2929
2930 if (DestTy == IceType_i64) {
2931 Variable64On32 *TmpReg64 = makeI64RegPair();
2932 Variable64On32 *New64 =
2933 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
2934 Variable64On32 *NewReg64 = makeI64RegPair();
2935 Variable64On32 *Expected64 =
2936 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
2937 Variable64On32 *ExpectedReg64 = makeI64RegPair();
2938
2939 New64->initHiLo(Func);
2940 New64->mustNotHaveReg();
2941 Expected64->initHiLo(Func);
2942 Expected64->mustNotHaveReg();
2943
2944 TmpReg = TmpReg64;
2945 New = New64;
2946 NewReg = NewReg64;
2947 Expected = Expected64;
2948 ExpectedReg = ExpectedReg64;
2949 } else {
2950 TmpReg = makeReg(DestTy);
2951 New = Func->makeVariable(DestTy);
2952 NewReg = makeReg(DestTy);
2953 Expected = Func->makeVariable(DestTy);
2954 ExpectedReg = makeReg(DestTy);
2955 }
2956
2957 Mem = formMemoryOperand(Instr->getArg(0), DestTy);
2958 if (DestTy == IceType_i64) {
2959 Context.insert(InstFakeDef::create(Func, Expected));
2960 }
2961 lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
2962 if (DestTy == IceType_i64) {
2963 Context.insert(InstFakeDef::create(Func, New));
2964 }
2965 lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
2966 _dmb();
2967
2968 Context.insert(Retry);
2969 if (DestTy == IceType_i64) {
2970 Context.insert(InstFakeDef::create(Func, ExpectedReg, Expected));
2971 }
2972 lowerAssign(InstAssign::create(Func, ExpectedReg, Expected));
2973 if (DestTy == IceType_i64) {
2974 Context.insert(InstFakeDef::create(Func, NewReg, New));
2975 }
2976 lowerAssign(InstAssign::create(Func, NewReg, New));
2977
2978 _ldrex(TmpReg, Mem);
2979 Context.insert(
2980 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
2981 if (DestTy == IceType_i64) {
2982 auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg);
2983 auto *ExpectedReg64 = llvm::cast<Variable64On32>(ExpectedReg);
2984 // lowerAssign above has added fake-defs for TmpReg and ExpectedReg. Let's
2985 // keep liveness happy, shall we?
2986 Context.insert(InstFakeUse::create(Func, TmpReg));
2987 Context.insert(InstFakeUse::create(Func, ExpectedReg));
2988 _cmp(TmpReg64->getHi(), ExpectedReg64->getHi());
2989 _cmp(TmpReg64->getLo(), ExpectedReg64->getLo(), CondARM32::EQ);
2990 } else {
2991 _cmp(TmpReg, ExpectedReg);
2992 }
2993 _strex(Success, NewReg, Mem, CondARM32::EQ);
2994 if (DestTy == IceType_i64) {
2995 auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg);
2996 auto *Expected64 = llvm::cast<Variable64On32>(Expected);
2997 _mov_redefined(Expected64->getHi(), TmpReg64->getHi(), CondARM32::NE);
2998 _mov_redefined(Expected64->getLo(), TmpReg64->getLo(), CondARM32::NE);
2999 auto *FakeDef = InstFakeDef::create(Func, Expected, TmpReg);
3000 Context.insert(FakeDef);
3001 FakeDef->setDestRedefined();
3002 } else {
3003 _mov_redefined(Expected, TmpReg, CondARM32::NE);
3004 }
3005 _cmp(Success, _0, CondARM32::EQ);
3006 _br(Retry, CondARM32::NE);
3007 _dmb();
3008 lowerAssign(InstAssign::create(Func, Dest, Expected));
3009 Context.insert(InstFakeUse::create(Func, Expected));
3010 if (auto *New64 = llvm::dyn_cast<Variable64On32>(New)) {
3011 Context.insert(InstFakeUse::create(Func, New64->getLo()));
3012 Context.insert(InstFakeUse::create(Func, New64->getHi()));
3013 } else {
3014 Context.insert(InstFakeUse::create(Func, New));
3015 }
3016 return;
3017 }
3018 case Intrinsics::AtomicRMW: {
3019 if (!Intrinsics::isMemoryOrderValid(
3020 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
3021 Func->setError("Unexpected memory ordering for AtomicRMW");
3022 return;
3023 }
3024 lowerAtomicRMW(
3025 Dest, static_cast<uint32_t>(
3026 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
3027 Instr->getArg(1), Instr->getArg(2));
2568 return; 3028 return;
2569 } 3029 }
2570 case Intrinsics::Bswap: { 3030 case Intrinsics::Bswap: {
2571 Variable *Dest = Instr->getDest();
2572 Operand *Val = Instr->getArg(0); 3031 Operand *Val = Instr->getArg(0);
2573 Type Ty = Val->getType(); 3032 Type Ty = Val->getType();
2574 if (Ty == IceType_i64) { 3033 if (Ty == IceType_i64) {
2575 Val = legalizeUndef(Val); 3034 Val = legalizeUndef(Val);
2576 Variable *Val_Lo = legalizeToReg(loOperand(Val)); 3035 Variable *Val_Lo = legalizeToReg(loOperand(Val));
2577 Variable *Val_Hi = legalizeToReg(hiOperand(Val)); 3036 Variable *Val_Hi = legalizeToReg(hiOperand(Val));
2578 Variable *T_Lo = makeReg(IceType_i32); 3037 Variable *T_Lo = makeReg(IceType_i32);
2579 Variable *T_Hi = makeReg(IceType_i32); 3038 Variable *T_Hi = makeReg(IceType_i32);
2580 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3039 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2581 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3040 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2582 _rev(T_Lo, Val_Lo); 3041 _rev(T_Lo, Val_Lo);
2583 _rev(T_Hi, Val_Hi); 3042 _rev(T_Hi, Val_Hi);
2584 _mov(DestLo, T_Hi); 3043 _mov(DestLo, T_Hi);
2585 _mov(DestHi, T_Lo); 3044 _mov(DestHi, T_Lo);
2586 } else { 3045 } else {
2587 assert(Ty == IceType_i32 || Ty == IceType_i16); 3046 assert(Ty == IceType_i32 || Ty == IceType_i16);
2588 Variable *ValR = legalizeToReg(Val); 3047 Variable *ValR = legalizeToReg(Val);
2589 Variable *T = makeReg(Ty); 3048 Variable *T = makeReg(Ty);
2590 _rev(T, ValR); 3049 _rev(T, ValR);
2591 if (Val->getType() == IceType_i16) { 3050 if (Val->getType() == IceType_i16) {
2592 Operand *Sixteen = 3051 Operand *Sixteen =
2593 legalize(Ctx->getConstantInt32(16), Legal_Reg | Legal_Flex); 3052 legalize(Ctx->getConstantInt32(16), Legal_Reg | Legal_Flex);
2594 _lsr(T, T, Sixteen); 3053 _lsr(T, T, Sixteen);
2595 } 3054 }
2596 _mov(Dest, T); 3055 _mov(Dest, T);
2597 } 3056 }
2598 return; 3057 return;
2599 } 3058 }
2600 case Intrinsics::Ctpop: { 3059 case Intrinsics::Ctpop: {
2601 Variable *Dest = Instr->getDest();
2602 Operand *Val = Instr->getArg(0); 3060 Operand *Val = Instr->getArg(0);
2603 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) 3061 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
2604 ? H_call_ctpop_i32 3062 ? H_call_ctpop_i32
2605 : H_call_ctpop_i64, 3063 : H_call_ctpop_i64,
2606 Dest, 1); 3064 Dest, 1);
2607 Call->addArg(Val); 3065 Call->addArg(Val);
2608 lowerCall(Call); 3066 lowerCall(Call);
2609 // The popcount helpers always return 32-bit values, while the intrinsic's 3067 // The popcount helpers always return 32-bit values, while the intrinsic's
2610 // signature matches some 64-bit platform's native instructions and expect 3068 // signature matches some 64-bit platform's native instructions and expect
2611 // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in 3069 // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in
(...skipping 14 matching lines...) Expand all
2626 Operand *Val = Instr->getArg(0); 3084 Operand *Val = Instr->getArg(0);
2627 Variable *ValLoR; 3085 Variable *ValLoR;
2628 Variable *ValHiR = nullptr; 3086 Variable *ValHiR = nullptr;
2629 if (Val->getType() == IceType_i64) { 3087 if (Val->getType() == IceType_i64) {
2630 Val = legalizeUndef(Val); 3088 Val = legalizeUndef(Val);
2631 ValLoR = legalizeToReg(loOperand(Val)); 3089 ValLoR = legalizeToReg(loOperand(Val));
2632 ValHiR = legalizeToReg(hiOperand(Val)); 3090 ValHiR = legalizeToReg(hiOperand(Val));
2633 } else { 3091 } else {
2634 ValLoR = legalizeToReg(Val); 3092 ValLoR = legalizeToReg(Val);
2635 } 3093 }
2636 lowerCLZ(Instr->getDest(), ValLoR, ValHiR); 3094 lowerCLZ(Dest, ValLoR, ValHiR);
2637 return; 3095 return;
2638 } 3096 }
2639 case Intrinsics::Cttz: { 3097 case Intrinsics::Cttz: {
2640 // Essentially like Clz, but reverse the bits first. 3098 // Essentially like Clz, but reverse the bits first.
2641 Operand *Val = Instr->getArg(0); 3099 Operand *Val = Instr->getArg(0);
2642 Variable *ValLoR; 3100 Variable *ValLoR;
2643 Variable *ValHiR = nullptr; 3101 Variable *ValHiR = nullptr;
2644 if (Val->getType() == IceType_i64) { 3102 if (Val->getType() == IceType_i64) {
2645 Val = legalizeUndef(Val); 3103 Val = legalizeUndef(Val);
2646 ValLoR = legalizeToReg(loOperand(Val)); 3104 ValLoR = legalizeToReg(loOperand(Val));
2647 ValHiR = legalizeToReg(hiOperand(Val)); 3105 ValHiR = legalizeToReg(hiOperand(Val));
2648 Variable *TLo = makeReg(IceType_i32); 3106 Variable *TLo = makeReg(IceType_i32);
2649 Variable *THi = makeReg(IceType_i32); 3107 Variable *THi = makeReg(IceType_i32);
2650 _rbit(TLo, ValLoR); 3108 _rbit(TLo, ValLoR);
2651 _rbit(THi, ValHiR); 3109 _rbit(THi, ValHiR);
2652 ValLoR = THi; 3110 ValLoR = THi;
2653 ValHiR = TLo; 3111 ValHiR = TLo;
2654 } else { 3112 } else {
2655 ValLoR = legalizeToReg(Val); 3113 ValLoR = legalizeToReg(Val);
2656 Variable *T = makeReg(IceType_i32); 3114 Variable *T = makeReg(IceType_i32);
2657 _rbit(T, ValLoR); 3115 _rbit(T, ValLoR);
2658 ValLoR = T; 3116 ValLoR = T;
2659 } 3117 }
2660 lowerCLZ(Instr->getDest(), ValLoR, ValHiR); 3118 lowerCLZ(Dest, ValLoR, ValHiR);
2661 return; 3119 return;
2662 } 3120 }
2663 case Intrinsics::Fabs: { 3121 case Intrinsics::Fabs: {
2664 Variable *Dest = Instr->getDest();
2665 Type DestTy = Dest->getType(); 3122 Type DestTy = Dest->getType();
2666 Variable *T = makeReg(DestTy); 3123 Variable *T = makeReg(DestTy);
2667 if (isVectorType(DestTy)) { 3124 if (isVectorType(DestTy)) {
2668 // Add a fake def to keep liveness consistent in the meantime. 3125 // Add a fake def to keep liveness consistent in the meantime.
2669 Context.insert(InstFakeDef::create(Func, T)); 3126 Context.insert(InstFakeDef::create(Func, T));
2670 _mov(Instr->getDest(), T); 3127 _mov(Dest, T);
2671 UnimplementedError(Func->getContext()->getFlags()); 3128 UnimplementedError(Func->getContext()->getFlags());
2672 return; 3129 return;
2673 } 3130 }
2674 _vabs(T, legalizeToReg(Instr->getArg(0))); 3131 _vabs(T, legalizeToReg(Instr->getArg(0)));
2675 _mov(Dest, T); 3132 _mov(Dest, T);
2676 return; 3133 return;
2677 } 3134 }
2678 case Intrinsics::Longjmp: { 3135 case Intrinsics::Longjmp: {
2679 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2); 3136 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
2680 Call->addArg(Instr->getArg(0)); 3137 Call->addArg(Instr->getArg(0));
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
2714 Call->addArg(Instr->getArg(0)); 3171 Call->addArg(Instr->getArg(0));
2715 Call->addArg(ValExt); 3172 Call->addArg(ValExt);
2716 Call->addArg(Instr->getArg(2)); 3173 Call->addArg(Instr->getArg(2));
2717 lowerCall(Call); 3174 lowerCall(Call);
2718 return; 3175 return;
2719 } 3176 }
2720 case Intrinsics::NaClReadTP: { 3177 case Intrinsics::NaClReadTP: {
2721 if (Ctx->getFlags().getUseSandboxing()) { 3178 if (Ctx->getFlags().getUseSandboxing()) {
2722 UnimplementedError(Func->getContext()->getFlags()); 3179 UnimplementedError(Func->getContext()->getFlags());
2723 } else { 3180 } else {
2724 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0); 3181 InstCall *Call = makeHelperCall(H_call_read_tp, Dest, 0);
2725 lowerCall(Call); 3182 lowerCall(Call);
2726 } 3183 }
2727 return; 3184 return;
2728 } 3185 }
2729 case Intrinsics::Setjmp: { 3186 case Intrinsics::Setjmp: {
2730 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1); 3187 InstCall *Call = makeHelperCall(H_call_setjmp, Dest, 1);
2731 Call->addArg(Instr->getArg(0)); 3188 Call->addArg(Instr->getArg(0));
2732 lowerCall(Call); 3189 lowerCall(Call);
2733 return; 3190 return;
2734 } 3191 }
2735 case Intrinsics::Sqrt: { 3192 case Intrinsics::Sqrt: {
2736 Variable *Src = legalizeToReg(Instr->getArg(0)); 3193 Variable *Src = legalizeToReg(Instr->getArg(0));
2737 Variable *Dest = Instr->getDest();
2738 Variable *T = makeReg(Dest->getType()); 3194 Variable *T = makeReg(Dest->getType());
2739 _vsqrt(T, Src); 3195 _vsqrt(T, Src);
2740 _mov(Dest, T); 3196 _mov(Dest, T);
2741 return; 3197 return;
2742 } 3198 }
2743 case Intrinsics::Stacksave: { 3199 case Intrinsics::Stacksave: {
2744 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 3200 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2745 Variable *Dest = Instr->getDest();
2746 _mov(Dest, SP); 3201 _mov(Dest, SP);
2747 return; 3202 return;
2748 } 3203 }
2749 case Intrinsics::Stackrestore: { 3204 case Intrinsics::Stackrestore: {
2750 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 3205 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2751 Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex); 3206 Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex);
2752 _mov_redefined(SP, Val); 3207 _mov_redefined(SP, Val);
2753 return; 3208 return;
2754 } 3209 }
2755 case Intrinsics::Trap: 3210 case Intrinsics::Trap:
(...skipping 461 matching lines...) Expand 10 before | Expand all | Expand 10 after
3217 } 3672 }
3218 // If we didn't do address mode optimization, then we only have a base/offset 3673 // If we didn't do address mode optimization, then we only have a base/offset
3219 // to work with. ARM always requires a base register, so just use that to 3674 // to work with. ARM always requires a base register, so just use that to
3220 // hold the operand. 3675 // hold the operand.
3221 Variable *Base = legalizeToReg(Operand); 3676 Variable *Base = legalizeToReg(Operand);
3222 return OperandARM32Mem::create( 3677 return OperandARM32Mem::create(
3223 Func, Ty, Base, 3678 Func, Ty, Base,
3224 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); 3679 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
3225 } 3680 }
3226 3681
3682 Variable64On32 *TargetARM32::makeI64RegPair() {
3683 Variable64On32 *Reg =
3684 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
3685 Reg->setMustHaveReg();
3686 Reg->initHiLo(Func);
3687 Reg->getLo()->setMustNotHaveReg();
3688 Reg->getHi()->setMustNotHaveReg();
3689 return Reg;
3690 }
3691
3227 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) { 3692 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
3228 // There aren't any 64-bit integer registers for ARM32. 3693 // There aren't any 64-bit integer registers for ARM32.
3229 assert(Type != IceType_i64); 3694 assert(Type != IceType_i64);
3230 Variable *Reg = Func->makeVariable(Type); 3695 Variable *Reg = Func->makeVariable(Type);
3231 if (RegNum == Variable::NoRegister) 3696 if (RegNum == Variable::NoRegister)
3232 Reg->setMustHaveReg(); 3697 Reg->setMustHaveReg();
3233 else 3698 else
3234 Reg->setRegNum(RegNum); 3699 Reg->setRegNum(RegNum);
3235 return Reg; 3700 return Reg;
3236 } 3701 }
(...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after
3479 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; 3944 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
3480 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { 3945 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
3481 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; 3946 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
3482 } 3947 }
3483 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 3948 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
3484 // However, for compatibility with current NaCl LLVM, don't claim that. 3949 // However, for compatibility with current NaCl LLVM, don't claim that.
3485 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 3950 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
3486 } 3951 }
3487 3952
3488 } // end of namespace Ice 3953 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698