OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
11 /// This file implements the TargetLoweringARM32 class, which consists almost | 11 /// This file implements the TargetLoweringARM32 class, which consists almost |
12 /// entirely of the lowering sequence for each high-level instruction. | 12 /// entirely of the lowering sequence for each high-level instruction. |
13 /// | 13 /// |
14 //===----------------------------------------------------------------------===// | 14 //===----------------------------------------------------------------------===// |
15 #include "IceTargetLoweringARM32.h" | 15 #include "IceTargetLoweringARM32.h" |
16 | 16 |
17 #include "IceCfg.h" | 17 #include "IceCfg.h" |
18 #include "IceCfgNode.h" | 18 #include "IceCfgNode.h" |
19 #include "IceClFlags.h" | 19 #include "IceClFlags.h" |
20 #include "IceDefs.h" | 20 #include "IceDefs.h" |
21 #include "IceELFObjectWriter.h" | 21 #include "IceELFObjectWriter.h" |
22 #include "IceGlobalInits.h" | 22 #include "IceGlobalInits.h" |
23 #include "IceInstARM32.def" | |
23 #include "IceInstARM32.h" | 24 #include "IceInstARM32.h" |
24 #include "IceLiveness.h" | 25 #include "IceLiveness.h" |
25 #include "IceOperand.h" | 26 #include "IceOperand.h" |
26 #include "IcePhiLoweringImpl.h" | 27 #include "IcePhiLoweringImpl.h" |
27 #include "IceRegistersARM32.h" | 28 #include "IceRegistersARM32.h" |
28 #include "IceTargetLoweringARM32.def" | 29 #include "IceTargetLoweringARM32.def" |
29 #include "IceUtils.h" | 30 #include "IceUtils.h" |
30 #include "llvm/Support/MathExtras.h" | 31 #include "llvm/Support/MathExtras.h" |
31 | 32 |
32 #include <algorithm> | 33 #include <algorithm> |
34 #include <utility> | |
33 | 35 |
34 namespace Ice { | 36 namespace Ice { |
35 | 37 |
36 namespace { | 38 namespace { |
37 | 39 |
38 // UnimplementedError is defined as a macro so that we can get actual line | 40 // UnimplementedError is defined as a macro so that we can get actual line |
39 // numbers. | 41 // numbers. |
40 #define UnimplementedError(Flags) \ | 42 #define UnimplementedError(Flags) \ |
41 do { \ | 43 do { \ |
42 if (!static_cast<const ClFlags &>(Flags).getSkipUnimplemented()) { \ | 44 if (!static_cast<const ClFlags &>(Flags).getSkipUnimplemented()) { \ |
(...skipping 330 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
373 isFP32, isFP64, isVec128, alias_init) \ | 375 isFP32, isFP64, isVec128, alias_init) \ |
374 name, | 376 name, |
375 REGARM32_TABLE | 377 REGARM32_TABLE |
376 #undef X | 378 #undef X |
377 }; | 379 }; |
378 | 380 |
379 return RegNames[RegNum]; | 381 return RegNames[RegNum]; |
380 } | 382 } |
381 | 383 |
382 Variable *TargetARM32::getPhysicalRegister(SizeT RegNum, Type Ty) { | 384 Variable *TargetARM32::getPhysicalRegister(SizeT RegNum, Type Ty) { |
383 if (Ty == IceType_void) | 385 static const Type DefaultType[] = { |
384 Ty = IceType_i32; | 386 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ |
387 isFP32, isFP64, isVec128, alias_init) \ | |
388 (isFP32) \ | |
389 ? IceType_f32 \ | |
390 : ((isFP64) ? IceType_f64 : ((isVec128 ? IceType_v4i32 : IceType_i32))), | |
391 REGARM32_TABLE | |
392 #undef X | |
393 }; | |
394 | |
395 assert(RegNum < RegARM32::Reg_NUM); | |
396 if (Ty == IceType_void) { | |
397 assert(RegNum < llvm::array_lengthof(DefaultType)); | |
398 Ty = DefaultType[RegNum]; | |
399 } | |
385 if (PhysicalRegisters[Ty].empty()) | 400 if (PhysicalRegisters[Ty].empty()) |
386 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM); | 401 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM); |
387 assert(RegNum < PhysicalRegisters[Ty].size()); | 402 assert(RegNum < PhysicalRegisters[Ty].size()); |
388 Variable *Reg = PhysicalRegisters[Ty][RegNum]; | 403 Variable *Reg = PhysicalRegisters[Ty][RegNum]; |
389 if (Reg == nullptr) { | 404 if (Reg == nullptr) { |
390 Reg = Func->makeVariable(Ty); | 405 Reg = Func->makeVariable(Ty); |
391 Reg->setRegNum(RegNum); | 406 Reg->setRegNum(RegNum); |
392 PhysicalRegisters[Ty][RegNum] = Reg; | 407 PhysicalRegisters[Ty][RegNum] = Reg; |
393 // Specially mark a named physical register as an "argument" so that it is | 408 // Specially mark a named physical register as an "argument" so that it is |
394 // considered live upon function entry. Otherwise it's possible to get | 409 // considered live upon function entry. Otherwise it's possible to get |
(...skipping 23 matching lines...) Expand all Loading... | |
418 llvm::report_fatal_error( | 433 llvm::report_fatal_error( |
419 "Infinite-weight Variable has no register assigned"); | 434 "Infinite-weight Variable has no register assigned"); |
420 } | 435 } |
421 int32_t Offset = Var->getStackOffset(); | 436 int32_t Offset = Var->getStackOffset(); |
422 int32_t BaseRegNum = Var->getBaseRegNum(); | 437 int32_t BaseRegNum = Var->getBaseRegNum(); |
423 if (BaseRegNum == Variable::NoRegister) { | 438 if (BaseRegNum == Variable::NoRegister) { |
424 BaseRegNum = getFrameOrStackReg(); | 439 BaseRegNum = getFrameOrStackReg(); |
425 if (!hasFramePointer()) | 440 if (!hasFramePointer()) |
426 Offset += getStackAdjustment(); | 441 Offset += getStackAdjustment(); |
427 } | 442 } |
428 if (!isLegalVariableStackOffset(Offset)) { | 443 const Type VarTy = Var->getType(); |
444 // In general, no Variable64On32 should be emited in textual asm output. It | |
445 // turns out that the lowering FakeDefs/FakeUses such a variables. If they end | |
Jim Stichnoth
2015/09/25 21:24:19
I think some words are missing from this sentence?
| |
446 // up being assigned an illegal offset we get a runtime error. We liberaly | |
Jim Stichnoth
2015/09/25 21:24:19
liberally
John
2015/09/25 22:18:29
Done.
| |
447 // allow Variable64On32 to have illegal offsets because offsets don't matter | |
448 // for FakeDefs/FakeUses. | |
449 if (!llvm::isa<Variable64On32>(Var) && | |
450 !isLegalVariableStackOffset(VarTy, Offset)) { | |
429 llvm::report_fatal_error("Illegal stack offset"); | 451 llvm::report_fatal_error("Illegal stack offset"); |
430 } | 452 } |
431 const Type FrameSPTy = stackSlotType(); | 453 Str << "[" << getRegName(BaseRegNum, VarTy); |
432 Str << "[" << getRegName(BaseRegNum, FrameSPTy); | |
433 if (Offset != 0) { | 454 if (Offset != 0) { |
434 Str << ", " << getConstantPrefix() << Offset; | 455 Str << ", " << getConstantPrefix() << Offset; |
435 } | 456 } |
436 Str << "]"; | 457 Str << "]"; |
437 } | 458 } |
438 | 459 |
439 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { | 460 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { |
440 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) | 461 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) |
441 return false; | 462 return false; |
442 int32_t RegLo, RegHi; | 463 int32_t RegLo, RegHi; |
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
585 return; | 606 return; |
586 } | 607 } |
587 Type Ty = Arg->getType(); | 608 Type Ty = Arg->getType(); |
588 InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty); | 609 InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty); |
589 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 610 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
590 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 611 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
591 // If the argument variable has been assigned a register, we need to load the | 612 // If the argument variable has been assigned a register, we need to load the |
592 // value from the stack slot. | 613 // value from the stack slot. |
593 if (Arg->hasReg()) { | 614 if (Arg->hasReg()) { |
594 assert(Ty != IceType_i64); | 615 assert(Ty != IceType_i64); |
595 OperandARM32Mem *Mem = OperandARM32Mem::create( | 616 // This should be simple, just load the parameter off the stack using a nice |
617 // sp + imm addressing mode. Because ARM, we can't do that (e.g., VLDR, for | |
618 // fp types, cannot have an index register), so we legalize the memory | |
619 // operand instead. | |
620 auto *Mem = OperandARM32Mem::create( | |
596 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( | 621 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( |
597 Ctx->getConstantInt32(Arg->getStackOffset()))); | 622 Ctx->getConstantInt32(Arg->getStackOffset()))); |
598 if (isVectorType(Arg->getType())) { | 623 legalizeToReg(Mem, Arg->getRegNum()); |
599 // Use vld1.$elem or something? | |
600 UnimplementedError(Func->getContext()->getFlags()); | |
601 } else if (isFloatingType(Arg->getType())) { | |
602 _vldr(Arg, Mem); | |
603 } else { | |
604 _ldr(Arg, Mem); | |
605 } | |
606 // This argument-copying instruction uses an explicit OperandARM32Mem | 624 // This argument-copying instruction uses an explicit OperandARM32Mem |
607 // operand instead of a Variable, so its fill-from-stack operation has to | 625 // operand instead of a Variable, so its fill-from-stack operation has to |
608 // be tracked separately for statistics. | 626 // be tracked separately for statistics. |
609 Ctx->statsUpdateFills(); | 627 Ctx->statsUpdateFills(); |
610 } | 628 } |
611 } | 629 } |
612 | 630 |
613 Type TargetARM32::stackSlotType() { return IceType_i32; } | 631 Type TargetARM32::stackSlotType() { return IceType_i32; } |
614 | 632 |
615 void TargetARM32::addProlog(CfgNode *Node) { | 633 void TargetARM32::addProlog(CfgNode *Node) { |
(...skipping 271 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
887 Variable *RetValue = nullptr; | 905 Variable *RetValue = nullptr; |
888 if (RI->getSrcSize()) | 906 if (RI->getSrcSize()) |
889 RetValue = llvm::cast<Variable>(RI->getSrc(0)); | 907 RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
890 _bundle_lock(); | 908 _bundle_lock(); |
891 _bic(LR, LR, RetMask); | 909 _bic(LR, LR, RetMask); |
892 _ret(LR, RetValue); | 910 _ret(LR, RetValue); |
893 _bundle_unlock(); | 911 _bundle_unlock(); |
894 RI->setDeleted(); | 912 RI->setDeleted(); |
895 } | 913 } |
896 | 914 |
897 bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const { | 915 bool TargetARM32::isLegalVariableStackOffset(Type Ty, int32_t Offset) const { |
898 constexpr bool SignExt = false; | 916 constexpr bool SignExt = false; |
899 // TODO(jvoung): vldr of FP stack slots has a different limit from the plain | 917 return OperandARM32Mem::canHoldOffset(Ty, SignExt, Offset); |
900 // stackSlotType(). | |
901 return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset); | |
902 } | 918 } |
903 | 919 |
904 StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var, | 920 StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var, |
921 int32_t StackAdjust, | |
905 Variable *OrigBaseReg) { | 922 Variable *OrigBaseReg) { |
906 int32_t Offset = Var->getStackOffset(); | 923 int32_t Offset = Var->getStackOffset() + StackAdjust; |
907 // Legalize will likely need a movw/movt combination, but if the top bits are | 924 // Legalize will likely need a movw/movt combination, but if the top bits are |
908 // all 0 from negating the offset and subtracting, we could use that instead. | 925 // all 0 from negating the offset and subtracting, we could use that instead. |
909 bool ShouldSub = (-Offset & 0xFFFF0000) == 0; | 926 bool ShouldSub = (-Offset & 0xFFFF0000) == 0; |
910 if (ShouldSub) | 927 if (ShouldSub) |
911 Offset = -Offset; | 928 Offset = -Offset; |
912 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset), | 929 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset), |
913 Legal_Reg | Legal_Flex, getReservedTmpReg()); | 930 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
914 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg()); | 931 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg()); |
915 if (ShouldSub) | 932 if (ShouldSub) |
916 _sub(ScratchReg, OrigBaseReg, OffsetVal); | 933 _sub(ScratchReg, OrigBaseReg, OffsetVal); |
(...skipping 13 matching lines...) Expand all Loading... | |
930 // to: | 947 // to: |
931 // movw/movt TMP, OFF_PART | 948 // movw/movt TMP, OFF_PART |
932 // add TMP, TMP, SP | 949 // add TMP, TMP, SP |
933 // ldr X, OFF_MORE[TMP] | 950 // ldr X, OFF_MORE[TMP] |
934 // | 951 // |
935 // This is safe because we have reserved TMP, and add for ARM does not | 952 // This is safe because we have reserved TMP, and add for ARM does not |
936 // clobber the flags register. | 953 // clobber the flags register. |
937 Func->dump("Before legalizeStackSlots"); | 954 Func->dump("Before legalizeStackSlots"); |
938 assert(hasComputedFrame()); | 955 assert(hasComputedFrame()); |
939 // Early exit, if SpillAreaSizeBytes is really small. | 956 // Early exit, if SpillAreaSizeBytes is really small. |
940 if (isLegalVariableStackOffset(SpillAreaSizeBytes)) | 957 // TODO(jpp): this is not safe -- loads and stores of q registers can't have |
958 // offsets. | |
959 if (isLegalVariableStackOffset(IceType_v4i32, SpillAreaSizeBytes)) | |
941 return; | 960 return; |
942 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); | 961 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); |
943 int32_t StackAdjust = 0; | 962 int32_t StackAdjust = 0; |
944 // Do a fairly naive greedy clustering for now. Pick the first stack slot | 963 // Do a fairly naive greedy clustering for now. Pick the first stack slot |
945 // that's out of bounds and make a new base reg using the architecture's temp | 964 // that's out of bounds and make a new base reg using the architecture's temp |
946 // register. If that works for the next slot, then great. Otherwise, create a | 965 // register. If that works for the next slot, then great. Otherwise, create a |
947 // new base register, clobbering the previous base register. Never share a | 966 // new base register, clobbering the previous base register. Never share a |
948 // base reg across different basic blocks. This isn't ideal if local and | 967 // base reg across different basic blocks. This isn't ideal if local and |
949 // multi-block variables are far apart and their references are interspersed. | 968 // multi-block variables are far apart and their references are interspersed. |
950 // It may help to be more coordinated about assign stack slot numbers and may | 969 // It may help to be more coordinated about assign stack slot numbers and may |
(...skipping 20 matching lines...) Expand all Loading... | |
971 StackAdjust += AdjInst->getAmount(); | 990 StackAdjust += AdjInst->getAmount(); |
972 NewBaseOffset += AdjInst->getAmount(); | 991 NewBaseOffset += AdjInst->getAmount(); |
973 continue; | 992 continue; |
974 } | 993 } |
975 if (llvm::isa<InstARM32Call>(CurInstr)) { | 994 if (llvm::isa<InstARM32Call>(CurInstr)) { |
976 NewBaseOffset -= StackAdjust; | 995 NewBaseOffset -= StackAdjust; |
977 StackAdjust = 0; | 996 StackAdjust = 0; |
978 continue; | 997 continue; |
979 } | 998 } |
980 } | 999 } |
1000 | |
981 // For now, only Mov instructions can have stack variables. We need to | 1001 // For now, only Mov instructions can have stack variables. We need to |
982 // know the type of instruction because we currently create a fresh one | 1002 // know the type of instruction because we currently create a fresh one |
983 // to replace Dest/Source, rather than mutate in place. | 1003 // to replace Dest/Source, rather than mutate in place. |
984 auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr); | 1004 bool MayNeedOffsetRewrite = false; |
985 if (!MovInst) { | 1005 if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) { |
1006 MayNeedOffsetRewrite = | |
1007 !MovInstr->isMultiDest() && !MovInstr->isMultiSource(); | |
1008 } | |
1009 | |
1010 if (!MayNeedOffsetRewrite) { | |
986 continue; | 1011 continue; |
987 } | 1012 } |
1013 | |
1014 assert(Dest != nullptr); | |
1015 Type DestTy = Dest->getType(); | |
1016 assert(DestTy != IceType_i64); | |
988 if (!Dest->hasReg()) { | 1017 if (!Dest->hasReg()) { |
989 int32_t Offset = Dest->getStackOffset(); | 1018 int32_t Offset = Dest->getStackOffset(); |
990 Offset += StackAdjust; | 1019 Offset += StackAdjust; |
991 if (!isLegalVariableStackOffset(Offset)) { | 1020 if (!isLegalVariableStackOffset(DestTy, Offset)) { |
992 if (NewBaseReg) { | 1021 if (NewBaseReg) { |
993 int32_t OffsetDiff = Offset - NewBaseOffset; | 1022 int32_t OffsetDiff = Offset - NewBaseOffset; |
994 if (isLegalVariableStackOffset(OffsetDiff)) { | 1023 if (isLegalVariableStackOffset(DestTy, OffsetDiff)) { |
995 StackVariable *NewDest = | 1024 StackVariable *NewDest = |
996 Func->makeVariable<StackVariable>(stackSlotType()); | 1025 Func->makeVariable<StackVariable>(stackSlotType()); |
997 NewDest->setMustNotHaveReg(); | 1026 NewDest->setMustNotHaveReg(); |
998 NewDest->setBaseRegNum(NewBaseReg->getBaseRegNum()); | 1027 NewDest->setBaseRegNum(NewBaseReg->getBaseRegNum()); |
999 NewDest->setStackOffset(OffsetDiff); | 1028 NewDest->setStackOffset(OffsetDiff); |
1000 Variable *NewDestVar = NewDest; | 1029 Variable *NewDestVar = NewDest; |
1001 _mov(NewDestVar, MovInst->getSrc(0)); | 1030 _mov(NewDestVar, CurInstr->getSrc(0)); |
1002 MovInst->setDeleted(); | 1031 CurInstr->setDeleted(); |
1003 continue; | 1032 continue; |
1004 } | 1033 } |
1005 } | 1034 } |
1006 StackVariable *LegalDest = legalizeVariableSlot(Dest, OrigBaseReg); | 1035 StackVariable *LegalDest = |
1036 legalizeVariableSlot(Dest, StackAdjust, OrigBaseReg); | |
1007 assert(LegalDest != Dest); | 1037 assert(LegalDest != Dest); |
1008 Variable *LegalDestVar = LegalDest; | 1038 Variable *LegalDestVar = LegalDest; |
1009 _mov(LegalDestVar, MovInst->getSrc(0)); | 1039 _mov(LegalDestVar, CurInstr->getSrc(0)); |
1010 MovInst->setDeleted(); | 1040 CurInstr->setDeleted(); |
1011 NewBaseReg = LegalDest; | 1041 NewBaseReg = LegalDest; |
1012 NewBaseOffset = Offset; | 1042 NewBaseOffset = Offset; |
1013 continue; | 1043 continue; |
1014 } | 1044 } |
1015 } | 1045 } |
1016 assert(MovInst->getSrcSize() == 1); | 1046 assert(CurInstr->getSrcSize() == 1); |
1017 Variable *Var = llvm::dyn_cast<Variable>(MovInst->getSrc(0)); | 1047 Variable *Var = llvm::dyn_cast<Variable>(CurInstr->getSrc(0)); |
1018 if (Var && !Var->hasReg()) { | 1048 if (Var && !Var->hasReg()) { |
1049 Type VarTy = Var->getType(); | |
1019 int32_t Offset = Var->getStackOffset(); | 1050 int32_t Offset = Var->getStackOffset(); |
1020 Offset += StackAdjust; | 1051 Offset += StackAdjust; |
1021 if (!isLegalVariableStackOffset(Offset)) { | 1052 if (!isLegalVariableStackOffset(VarTy, Offset)) { |
1022 if (NewBaseReg) { | 1053 if (NewBaseReg) { |
1023 int32_t OffsetDiff = Offset - NewBaseOffset; | 1054 int32_t OffsetDiff = Offset - NewBaseOffset; |
1024 if (isLegalVariableStackOffset(OffsetDiff)) { | 1055 if (isLegalVariableStackOffset(VarTy, OffsetDiff)) { |
1025 StackVariable *NewVar = | 1056 StackVariable *NewVar = |
1026 Func->makeVariable<StackVariable>(stackSlotType()); | 1057 Func->makeVariable<StackVariable>(stackSlotType()); |
1027 NewVar->setMustNotHaveReg(); | 1058 NewVar->setMustNotHaveReg(); |
1028 NewVar->setBaseRegNum(NewBaseReg->getBaseRegNum()); | 1059 NewVar->setBaseRegNum(NewBaseReg->getBaseRegNum()); |
1029 NewVar->setStackOffset(OffsetDiff); | 1060 NewVar->setStackOffset(OffsetDiff); |
1030 _mov(Dest, NewVar); | 1061 _mov(Dest, NewVar); |
1031 MovInst->setDeleted(); | 1062 CurInstr->setDeleted(); |
1032 continue; | 1063 continue; |
1033 } | 1064 } |
1034 } | 1065 } |
1035 StackVariable *LegalVar = legalizeVariableSlot(Var, OrigBaseReg); | 1066 StackVariable *LegalVar = |
1067 legalizeVariableSlot(Var, StackAdjust, OrigBaseReg); | |
1036 assert(LegalVar != Var); | 1068 assert(LegalVar != Var); |
1037 _mov(Dest, LegalVar); | 1069 _mov(Dest, LegalVar); |
1038 MovInst->setDeleted(); | 1070 CurInstr->setDeleted(); |
1039 NewBaseReg = LegalVar; | 1071 NewBaseReg = LegalVar; |
1040 NewBaseOffset = Offset; | 1072 NewBaseOffset = Offset; |
1041 continue; | 1073 continue; |
1042 } | 1074 } |
1043 } | 1075 } |
1044 } | 1076 } |
1045 } | 1077 } |
1046 } | 1078 } |
1047 | 1079 |
1048 Operand *TargetARM32::loOperand(Operand *Operand) { | 1080 Operand *TargetARM32::loOperand(Operand *Operand) { |
(...skipping 371 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1420 _mul(T_Acc, Src0RLo, Src1RHi); | 1452 _mul(T_Acc, Src0RLo, Src1RHi); |
1421 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); | 1453 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); |
1422 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); | 1454 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); |
1423 _add(T_Hi, T_Hi1, T_Acc1); | 1455 _add(T_Hi, T_Hi1, T_Acc1); |
1424 _mov(DestLo, T_Lo); | 1456 _mov(DestLo, T_Lo); |
1425 _mov(DestHi, T_Hi); | 1457 _mov(DestHi, T_Hi); |
1426 return; | 1458 return; |
1427 } | 1459 } |
1428 case InstArithmetic::Shl: { | 1460 case InstArithmetic::Shl: { |
1429 // a=b<<c ==> | 1461 // a=b<<c ==> |
1462 // pnacl-llc does: | |
1463 // mov t_b.lo, b.lo | |
1464 // mov t_b.hi, b.hi | |
1465 // mov t_c.lo, c.lo | |
1466 // rsb T0, t_c.lo, #32 | |
1467 // lsr T1, t_b.lo, T0 | |
1468 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo | |
1469 // sub T2, t_c.lo, #32 | |
1470 // cmp T2, #0 | |
1471 // lslge t_a.hi, t_b.lo, T2 | |
1472 // lsl t_a.lo, t_b.lo, t_c.lo | |
1473 // mov a.lo, t_a.lo | |
1474 // mov a.hi, t_a.hi | |
1475 // | |
1430 // GCC 4.8 does: | 1476 // GCC 4.8 does: |
1431 // sub t_c1, c.lo, #32 | 1477 // sub t_c1, c.lo, #32 |
1432 // lsl t_hi, b.hi, c.lo | 1478 // lsl t_hi, b.hi, c.lo |
1433 // orr t_hi, t_hi, b.lo, lsl t_c1 | 1479 // orr t_hi, t_hi, b.lo, lsl t_c1 |
1434 // rsb t_c2, c.lo, #32 | 1480 // rsb t_c2, c.lo, #32 |
1435 // orr t_hi, t_hi, b.lo, lsr t_c2 | 1481 // orr t_hi, t_hi, b.lo, lsr t_c2 |
1436 // lsl t_lo, b.lo, c.lo | 1482 // lsl t_lo, b.lo, c.lo |
1437 // a.lo = t_lo | 1483 // a.lo = t_lo |
1438 // a.hi = t_hi | 1484 // a.hi = t_hi |
1485 // | |
1486 // These are incompatible, therefore we mimic pnacl-llc. | |
1439 // Can be strength-reduced for constant-shifts, but we don't do that for | 1487 // Can be strength-reduced for constant-shifts, but we don't do that for |
1440 // now. | 1488 // now. |
1441 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On | 1489 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On |
1442 // ARM, shifts only take the lower 8 bits of the shift register, and | 1490 // ARM, shifts only take the lower 8 bits of the shift register, and |
1443 // saturate to the range 0-32, so the negative value will saturate to 32. | 1491 // saturate to the range 0-32, so the negative value will saturate to 32. |
1444 Variable *T_Hi = makeReg(IceType_i32); | 1492 Constant *_32 = Ctx->getConstantInt32(32); |
1493 Constant *_0 = Ctx->getConstantZero(IceType_i32); | |
1445 Variable *Src1RLo = legalizeToReg(Src1Lo); | 1494 Variable *Src1RLo = legalizeToReg(Src1Lo); |
1446 Constant *ThirtyTwo = Ctx->getConstantInt32(32); | 1495 Variable *T0 = makeReg(IceType_i32); |
1447 Variable *T_C1 = makeReg(IceType_i32); | 1496 Variable *T1 = makeReg(IceType_i32); |
1448 Variable *T_C2 = makeReg(IceType_i32); | 1497 Variable *T2 = makeReg(IceType_i32); |
1449 _sub(T_C1, Src1RLo, ThirtyTwo); | 1498 Variable *TA_Hi = makeReg(IceType_i32); |
1450 _lsl(T_Hi, Src0RHi, Src1RLo); | 1499 Variable *TA_Lo = makeReg(IceType_i32); |
1451 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | 1500 _rsb(T0, Src1RLo, _32); |
1452 OperandARM32::LSL, T_C1)); | 1501 _lsr(T1, Src0RLo, T0); |
1453 _rsb(T_C2, Src1RLo, ThirtyTwo); | 1502 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
1454 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | 1503 OperandARM32::LSL, Src1RLo)); |
1455 OperandARM32::LSR, T_C2)); | 1504 _sub(T2, Src1RLo, _32); |
1456 _mov(DestHi, T_Hi); | 1505 _cmp(T2, _0); |
1457 Variable *T_Lo = makeReg(IceType_i32); | 1506 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE); |
1458 // _mov seems to sometimes have better register preferencing than lsl. | 1507 _set_dest_nonkillable(); |
1459 // Otherwise mov w/ lsl shifted register is a pseudo-instruction that | 1508 _lsl(TA_Lo, Src0RLo, Src1RLo); |
1460 // maps to lsl. | 1509 _mov(DestLo, TA_Lo); |
1461 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | 1510 _mov(DestHi, TA_Hi); |
1462 OperandARM32::LSL, Src1RLo)); | |
1463 _mov(DestLo, T_Lo); | |
1464 return; | 1511 return; |
1465 } | 1512 } |
1466 case InstArithmetic::Lshr: | 1513 case InstArithmetic::Lshr: |
1467 // a=b>>c (unsigned) ==> | 1514 // a=b>>c (unsigned) ==> |
1515 // pnacl-llc does: | |
1516 // mov t_b.lo, b.lo | |
1517 // mov t_b.hi, b.hi | |
1518 // mov t_c.lo, c.lo | |
1519 // lsr T0, t_b.lo, t_c.lo | |
1520 // rsb T1, t_c.lo, #32 | |
1521 // orr t_a.lo, T0, t_b.hi, lsl T1 | |
1522 // sub T2, t_c.lo, #32 | |
1523 // cmp T2, #0 | |
1524 // lsrge t_a.lo, t_b.hi, T2 | |
1525 // lsr t_a.hi, t_b.hi, t_c.lo | |
1526 // a.lo = t_a.lo | |
1527 // a.hi = t_a.hi | |
1528 // | |
1468 // GCC 4.8 does: | 1529 // GCC 4.8 does: |
1469 // rsb t_c1, c.lo, #32 | 1530 // rsb t_c1, c.lo, #32 |
1470 // lsr t_lo, b.lo, c.lo | 1531 // lsr t_lo, b.lo, c.lo |
1471 // orr t_lo, t_lo, b.hi, lsl t_c1 | 1532 // orr t_lo, t_lo, b.hi, lsl t_c1 |
1472 // sub t_c2, c.lo, #32 | 1533 // sub t_c2, c.lo, #32 |
1473 // orr t_lo, t_lo, b.hi, lsr t_c2 | 1534 // orr t_lo, t_lo, b.hi, lsr t_c2 |
1474 // lsr t_hi, b.hi, c.lo | 1535 // lsr t_hi, b.hi, c.lo |
1475 // a.lo = t_lo | 1536 // a.lo = t_lo |
1476 // a.hi = t_hi | 1537 // a.hi = t_hi |
1538 // | |
1539 // These are incompatible, therefore we mimic pnacl-llc. | |
1477 case InstArithmetic::Ashr: { | 1540 case InstArithmetic::Ashr: { |
Jim Stichnoth
2015/09/25 21:24:19
Do you think there's an elegant way to move "case
John
2015/09/25 22:18:29
I tried my best. :)
| |
1478 // a=b>>c (signed) ==> ... | 1541 // a=b>>c (signed) ==> ... |
1479 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, and the | 1542 // |
1480 // next orr should be conditioned on PLUS. The last two right shifts | 1543 // Ashr is similar, with _asr instead of _lsr. |
1481 // should also be arithmetic. | 1544 const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; |
1482 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; | 1545 Constant *_32 = Ctx->getConstantInt32(32); |
1483 Variable *T_Lo = makeReg(IceType_i32); | 1546 Constant *_0 = Ctx->getConstantZero(IceType_i32); |
1484 Variable *Src1RLo = legalizeToReg(Src1Lo); | 1547 Variable *Src1RLo = legalizeToReg(Src1Lo); |
1485 Constant *ThirtyTwo = Ctx->getConstantInt32(32); | 1548 Variable *T0 = makeReg(IceType_i32); |
1486 Variable *T_C1 = makeReg(IceType_i32); | 1549 Variable *T1 = makeReg(IceType_i32); |
1487 Variable *T_C2 = makeReg(IceType_i32); | 1550 Variable *T2 = makeReg(IceType_i32); |
1488 _rsb(T_C1, Src1RLo, ThirtyTwo); | 1551 Variable *TA_Lo = makeReg(IceType_i32); |
1489 _lsr(T_Lo, Src0RLo, Src1RLo); | 1552 Variable *TA_Hi = makeReg(IceType_i32); |
1490 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | 1553 _lsr(T0, Src0RLo, Src1RLo); |
1491 OperandARM32::LSL, T_C1)); | 1554 _rsb(T1, Src1RLo, _32); |
1492 OperandARM32::ShiftKind RShiftKind; | 1555 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
1493 CondARM32::Cond Pred; | 1556 OperandARM32::LSL, T1)); |
1557 _sub(T2, Src1RLo, _32); | |
1558 _cmp(T2, _0); | |
1494 if (IsAshr) { | 1559 if (IsAshr) { |
1495 _subs(T_C2, Src1RLo, ThirtyTwo); | 1560 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE); |
1496 RShiftKind = OperandARM32::ASR; | 1561 _set_dest_nonkillable(); |
1497 Pred = CondARM32::PL; | 1562 _asr(TA_Hi, Src0RHi, Src1RLo); |
1498 } else { | 1563 } else { |
1499 _sub(T_C2, Src1RLo, ThirtyTwo); | 1564 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE); |
1500 RShiftKind = OperandARM32::LSR; | 1565 _set_dest_nonkillable(); |
1501 Pred = CondARM32::AL; | 1566 _lsr(TA_Hi, Src0RHi, Src1RLo); |
1502 } | 1567 } |
1503 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | 1568 _mov(DestLo, TA_Lo); |
1504 RShiftKind, T_C2), | 1569 _mov(DestHi, TA_Hi); |
1505 Pred); | |
1506 _mov(DestLo, T_Lo); | |
1507 Variable *T_Hi = makeReg(IceType_i32); | |
1508 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
1509 RShiftKind, Src1RLo)); | |
1510 _mov(DestHi, T_Hi); | |
1511 return; | 1570 return; |
1512 } | 1571 } |
1513 case InstArithmetic::Fadd: | 1572 case InstArithmetic::Fadd: |
1514 case InstArithmetic::Fsub: | 1573 case InstArithmetic::Fsub: |
1515 case InstArithmetic::Fmul: | 1574 case InstArithmetic::Fmul: |
1516 case InstArithmetic::Fdiv: | 1575 case InstArithmetic::Fdiv: |
1517 case InstArithmetic::Frem: | 1576 case InstArithmetic::Frem: |
1518 llvm_unreachable("FP instruction with i64 type"); | 1577 llvm_unreachable("FP instruction with i64 type"); |
1519 return; | 1578 return; |
1520 case InstArithmetic::Udiv: | 1579 case InstArithmetic::Udiv: |
1521 case InstArithmetic::Sdiv: | 1580 case InstArithmetic::Sdiv: |
1522 case InstArithmetic::Urem: | 1581 case InstArithmetic::Urem: |
1523 case InstArithmetic::Srem: | 1582 case InstArithmetic::Srem: |
1524 llvm_unreachable("Call-helper-involved instruction for i64 type " | 1583 llvm_unreachable("Call-helper-involved instruction for i64 type " |
1525 "should have already been handled before"); | 1584 "should have already been handled before"); |
1526 return; | 1585 return; |
1527 } | 1586 } |
1528 return; | 1587 return; |
1529 } else if (isVectorType(Dest->getType())) { | 1588 } else if (isVectorType(Dest->getType())) { |
1589 // Add a fake def to keep liveness consistent in the meantime. | |
1590 Variable *T = makeReg(Dest->getType()); | |
1591 Context.insert(InstFakeDef::create(Func, T)); | |
1592 _mov(Dest, T); | |
1530 UnimplementedError(Func->getContext()->getFlags()); | 1593 UnimplementedError(Func->getContext()->getFlags()); |
1531 // Add a fake def to keep liveness consistent in the meantime. | |
1532 Context.insert(InstFakeDef::create(Func, Dest)); | |
1533 return; | 1594 return; |
1534 } | 1595 } |
1535 // Dest->getType() is a non-i64 scalar. | 1596 // Dest->getType() is a non-i64 scalar. |
1536 Variable *Src0R = legalizeToReg(Src0); | 1597 Variable *Src0R = legalizeToReg(Src0); |
1537 Variable *T = makeReg(Dest->getType()); | 1598 Variable *T = makeReg(Dest->getType()); |
1538 // Handle div/rem separately. They require a non-legalized Src1 to inspect | 1599 // Handle div/rem separately. They require a non-legalized Src1 to inspect |
1539 // whether or not Src1 is a non-zero constant. Once legalized it is more | 1600 // whether or not Src1 is a non-zero constant. Once legalized it is more |
1540 // difficult to determine (constant may be moved to a register). | 1601 // difficult to determine (constant may be moved to a register). |
1541 switch (Inst->getOp()) { | 1602 switch (Inst->getOp()) { |
1542 default: | 1603 default: |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1578 } | 1639 } |
1579 | 1640 |
1580 // Handle floating point arithmetic separately: they require Src1 to be | 1641 // Handle floating point arithmetic separately: they require Src1 to be |
1581 // legalized to a register. | 1642 // legalized to a register. |
1582 switch (Inst->getOp()) { | 1643 switch (Inst->getOp()) { |
1583 default: | 1644 default: |
1584 break; | 1645 break; |
1585 case InstArithmetic::Fadd: { | 1646 case InstArithmetic::Fadd: { |
1586 Variable *Src1R = legalizeToReg(Src1); | 1647 Variable *Src1R = legalizeToReg(Src1); |
1587 _vadd(T, Src0R, Src1R); | 1648 _vadd(T, Src0R, Src1R); |
1588 _vmov(Dest, T); | 1649 _mov(Dest, T); |
1589 return; | 1650 return; |
1590 } | 1651 } |
1591 case InstArithmetic::Fsub: { | 1652 case InstArithmetic::Fsub: { |
1592 Variable *Src1R = legalizeToReg(Src1); | 1653 Variable *Src1R = legalizeToReg(Src1); |
1593 _vsub(T, Src0R, Src1R); | 1654 _vsub(T, Src0R, Src1R); |
1594 _vmov(Dest, T); | 1655 _mov(Dest, T); |
1595 return; | 1656 return; |
1596 } | 1657 } |
1597 case InstArithmetic::Fmul: { | 1658 case InstArithmetic::Fmul: { |
1598 Variable *Src1R = legalizeToReg(Src1); | 1659 Variable *Src1R = legalizeToReg(Src1); |
1599 _vmul(T, Src0R, Src1R); | 1660 _vmul(T, Src0R, Src1R); |
1600 _vmov(Dest, T); | 1661 _mov(Dest, T); |
1601 return; | 1662 return; |
1602 } | 1663 } |
1603 case InstArithmetic::Fdiv: { | 1664 case InstArithmetic::Fdiv: { |
1604 Variable *Src1R = legalizeToReg(Src1); | 1665 Variable *Src1R = legalizeToReg(Src1); |
1605 _vdiv(T, Src0R, Src1R); | 1666 _vdiv(T, Src0R, Src1R); |
1606 _vmov(Dest, T); | 1667 _mov(Dest, T); |
1607 return; | 1668 return; |
1608 } | 1669 } |
1609 } | 1670 } |
1610 | 1671 |
1611 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); | 1672 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); |
1612 switch (Inst->getOp()) { | 1673 switch (Inst->getOp()) { |
1613 case InstArithmetic::_num: | 1674 case InstArithmetic::_num: |
1614 llvm_unreachable("Unknown arithmetic operator"); | 1675 llvm_unreachable("Unknown arithmetic operator"); |
1615 return; | 1676 return; |
1616 case InstArithmetic::Add: | 1677 case InstArithmetic::Add: |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1670 void TargetARM32::lowerAssign(const InstAssign *Inst) { | 1731 void TargetARM32::lowerAssign(const InstAssign *Inst) { |
1671 Variable *Dest = Inst->getDest(); | 1732 Variable *Dest = Inst->getDest(); |
1672 Operand *Src0 = Inst->getSrc(0); | 1733 Operand *Src0 = Inst->getSrc(0); |
1673 assert(Dest->getType() == Src0->getType()); | 1734 assert(Dest->getType() == Src0->getType()); |
1674 if (Dest->getType() == IceType_i64) { | 1735 if (Dest->getType() == IceType_i64) { |
1675 Src0 = legalizeUndef(Src0); | 1736 Src0 = legalizeUndef(Src0); |
1676 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | 1737 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
1677 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | 1738 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); |
1678 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1739 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
1679 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1740 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
1680 Variable *T_Lo = nullptr, *T_Hi = nullptr; | 1741 Variable *T_Lo = makeReg(IceType_i32); |
1742 Variable *T_Hi = makeReg(IceType_i32); | |
1681 _mov(T_Lo, Src0Lo); | 1743 _mov(T_Lo, Src0Lo); |
1682 _mov(DestLo, T_Lo); | 1744 _mov(DestLo, T_Lo); |
1683 _mov(T_Hi, Src0Hi); | 1745 _mov(T_Hi, Src0Hi); |
1684 _mov(DestHi, T_Hi); | 1746 _mov(DestHi, T_Hi); |
1685 } else { | 1747 } else { |
1686 Operand *NewSrc; | 1748 Operand *NewSrc; |
1687 if (Dest->hasReg()) { | 1749 if (Dest->hasReg()) { |
1688 // If Dest already has a physical register, then legalize the Src operand | 1750 // If Dest already has a physical register, then legalize the Src operand |
1689 // into a Variable with the same register assignment. This especially | 1751 // into a Variable with the same register assignment. This especially |
1690 // helps allow the use of Flex operands. | 1752 // helps allow the use of Flex operands. |
1691 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); | 1753 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); |
1692 } else { | 1754 } else { |
1693 // Dest could be a stack operand. Since we could potentially need to do a | 1755 // Dest could be a stack operand. Since we could potentially need to do a |
1694 // Store (and store can only have Register operands), legalize this to a | 1756 // Store (and store can only have Register operands), legalize this to a |
1695 // register. | 1757 // register. |
1696 NewSrc = legalize(Src0, Legal_Reg); | 1758 NewSrc = legalize(Src0, Legal_Reg); |
1697 } | 1759 } |
1698 if (isVectorType(Dest->getType())) { | 1760 if (isVectorType(Dest->getType())) { |
1699 UnimplementedError(Func->getContext()->getFlags()); | 1761 Variable *SrcR = legalizeToReg(NewSrc); |
1762 _mov(Dest, SrcR); | |
1700 } else if (isFloatingType(Dest->getType())) { | 1763 } else if (isFloatingType(Dest->getType())) { |
1701 Variable *SrcR = legalizeToReg(NewSrc); | 1764 Variable *SrcR = legalizeToReg(NewSrc); |
1702 _vmov(Dest, SrcR); | 1765 _mov(Dest, SrcR); |
1703 } else { | 1766 } else { |
1704 _mov(Dest, NewSrc); | 1767 _mov(Dest, NewSrc); |
1705 } | 1768 } |
1706 } | 1769 } |
1707 } | 1770 } |
1708 | 1771 |
1709 void TargetARM32::lowerBr(const InstBr *Inst) { | 1772 void TargetARM32::lowerBr(const InstBr *Inst) { |
1710 if (Inst->isUnconditional()) { | 1773 if (Inst->isUnconditional()) { |
1711 _br(Inst->getTargetUnconditional()); | 1774 _br(Inst->getTargetUnconditional()); |
1712 return; | 1775 return; |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1762 if (CC.I32InReg(&Reg)) { | 1825 if (CC.I32InReg(&Reg)) { |
1763 InRegs = true; | 1826 InRegs = true; |
1764 GPRArgs.push_back(std::make_pair(Arg, Reg)); | 1827 GPRArgs.push_back(std::make_pair(Arg, Reg)); |
1765 } | 1828 } |
1766 } | 1829 } |
1767 | 1830 |
1768 if (!InRegs) { | 1831 if (!InRegs) { |
1769 ParameterAreaSizeBytes = | 1832 ParameterAreaSizeBytes = |
1770 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); | 1833 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); |
1771 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); | 1834 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); |
1772 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | 1835 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty); |
1773 } | 1836 } |
1774 } | 1837 } |
1775 | 1838 |
1776 // Adjust the parameter area so that the stack is aligned. It is assumed that | 1839 // Adjust the parameter area so that the stack is aligned. It is assumed that |
1777 // the stack is already aligned at the start of the calling sequence. | 1840 // the stack is already aligned at the start of the calling sequence. |
1778 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); | 1841 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); |
1779 | 1842 |
1780 // Subtract the appropriate amount for the argument area. This also takes | 1843 // Subtract the appropriate amount for the argument area. This also takes |
1781 // care of setting the stack adjustment during emission. | 1844 // care of setting the stack adjustment during emission. |
1782 // | 1845 // |
(...skipping 19 matching lines...) Expand all Loading... | |
1802 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc); | 1865 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc); |
1803 } else { | 1866 } else { |
1804 Variable *NewBase = Func->makeVariable(SP->getType()); | 1867 Variable *NewBase = Func->makeVariable(SP->getType()); |
1805 lowerArithmetic( | 1868 lowerArithmetic( |
1806 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc)); | 1869 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc)); |
1807 Addr = formMemoryOperand(NewBase, Ty); | 1870 Addr = formMemoryOperand(NewBase, Ty); |
1808 } | 1871 } |
1809 lowerStore(InstStore::create(Func, StackArg.first, Addr)); | 1872 lowerStore(InstStore::create(Func, StackArg.first, Addr)); |
1810 } | 1873 } |
1811 | 1874 |
1812 // Copy arguments to be passed in registers to the appropriate registers. | |
1813 for (auto &GPRArg : GPRArgs) { | |
1814 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second); | |
1815 // Generate a FakeUse of register arguments so that they do not get dead | |
1816 // code eliminated as a result of the FakeKill of scratch registers after | |
1817 // the call. | |
1818 Context.insert(InstFakeUse::create(Func, Reg)); | |
1819 } | |
1820 for (auto &FPArg : FPArgs) { | |
1821 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second); | |
1822 Context.insert(InstFakeUse::create(Func, Reg)); | |
1823 } | |
1824 | |
1825 // Generate the call instruction. Assign its result to a temporary with high | 1875 // Generate the call instruction. Assign its result to a temporary with high |
1826 // register allocation weight. | 1876 // register allocation weight. |
1827 Variable *Dest = Instr->getDest(); | 1877 Variable *Dest = Instr->getDest(); |
1828 // ReturnReg doubles as ReturnRegLo as necessary. | 1878 // ReturnReg doubles as ReturnRegLo as necessary. |
1829 Variable *ReturnReg = nullptr; | 1879 Variable *ReturnReg = nullptr; |
1830 Variable *ReturnRegHi = nullptr; | 1880 Variable *ReturnRegHi = nullptr; |
1831 if (Dest) { | 1881 if (Dest) { |
1832 switch (Dest->getType()) { | 1882 switch (Dest->getType()) { |
1833 case IceType_NUM: | 1883 case IceType_NUM: |
1834 llvm_unreachable("Invalid Call dest type"); | 1884 llvm_unreachable("Invalid Call dest type"); |
(...skipping 30 matching lines...) Expand all Loading... | |
1865 Operand *CallTarget = Instr->getCallTarget(); | 1915 Operand *CallTarget = Instr->getCallTarget(); |
1866 // TODO(jvoung): Handle sandboxing. const bool NeedSandboxing = | 1916 // TODO(jvoung): Handle sandboxing. const bool NeedSandboxing = |
1867 // Ctx->getFlags().getUseSandboxing(); | 1917 // Ctx->getFlags().getUseSandboxing(); |
1868 | 1918 |
1869 // Allow ConstantRelocatable to be left alone as a direct call, but force | 1919 // Allow ConstantRelocatable to be left alone as a direct call, but force |
1870 // other constants like ConstantInteger32 to be in a register and make it an | 1920 // other constants like ConstantInteger32 to be in a register and make it an |
1871 // indirect call. | 1921 // indirect call. |
1872 if (!llvm::isa<ConstantRelocatable>(CallTarget)) { | 1922 if (!llvm::isa<ConstantRelocatable>(CallTarget)) { |
1873 CallTarget = legalize(CallTarget, Legal_Reg); | 1923 CallTarget = legalize(CallTarget, Legal_Reg); |
1874 } | 1924 } |
1925 | |
1926 // Copy arguments to be passed in registers to the appropriate registers. | |
1927 for (auto &FPArg : FPArgs) { | |
1928 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second); | |
1929 Context.insert(InstFakeUse::create(Func, Reg)); | |
1930 } | |
1931 for (auto &GPRArg : GPRArgs) { | |
1932 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second); | |
1933 // Generate a FakeUse of register arguments so that they do not get dead | |
1934 // code eliminated as a result of the FakeKill of scratch registers after | |
1935 // the call. | |
1936 Context.insert(InstFakeUse::create(Func, Reg)); | |
1937 } | |
1875 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); | 1938 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); |
1876 Context.insert(NewCall); | 1939 Context.insert(NewCall); |
1877 if (ReturnRegHi) | 1940 if (ReturnRegHi) |
1878 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | 1941 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
1879 | 1942 |
1880 // Add the appropriate offset to SP. The call instruction takes care of | 1943 // Add the appropriate offset to SP. The call instruction takes care of |
1881 // resetting the stack offset during emission. | 1944 // resetting the stack offset during emission. |
1882 if (ParameterAreaSizeBytes) { | 1945 if (ParameterAreaSizeBytes) { |
1883 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), | 1946 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), |
1884 Legal_Reg | Legal_Flex); | 1947 Legal_Reg | Legal_Flex); |
(...skipping 16 matching lines...) Expand all Loading... | |
1901 // Assign the result of the call to Dest. | 1964 // Assign the result of the call to Dest. |
1902 if (ReturnReg) { | 1965 if (ReturnReg) { |
1903 if (ReturnRegHi) { | 1966 if (ReturnRegHi) { |
1904 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); | 1967 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); |
1905 Variable *DestLo = Dest64On32->getLo(); | 1968 Variable *DestLo = Dest64On32->getLo(); |
1906 Variable *DestHi = Dest64On32->getHi(); | 1969 Variable *DestHi = Dest64On32->getHi(); |
1907 _mov(DestLo, ReturnReg); | 1970 _mov(DestLo, ReturnReg); |
1908 _mov(DestHi, ReturnRegHi); | 1971 _mov(DestHi, ReturnRegHi); |
1909 } else { | 1972 } else { |
1910 if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) { | 1973 if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) { |
1911 _vmov(Dest, ReturnReg); | 1974 _mov(Dest, ReturnReg); |
1912 } else { | 1975 } else { |
1913 assert(isIntegerType(Dest->getType()) && | 1976 assert(isIntegerType(Dest->getType()) && |
1914 typeWidthInBytes(Dest->getType()) <= 4); | 1977 typeWidthInBytes(Dest->getType()) <= 4); |
1915 _mov(Dest, ReturnReg); | 1978 _mov(Dest, ReturnReg); |
1916 } | 1979 } |
1917 } | 1980 } |
1918 } | 1981 } |
1919 } | 1982 } |
1920 | 1983 |
1984 namespace { | |
1985 void forceHiLoInReg(Variable64On32 *Var) { | |
1986 Var->getHi()->setMustHaveReg(); | |
1987 Var->getLo()->setMustHaveReg(); | |
1988 } | |
1989 } // end of anonymous namespace | |
1990 | |
1921 void TargetARM32::lowerCast(const InstCast *Inst) { | 1991 void TargetARM32::lowerCast(const InstCast *Inst) { |
1922 InstCast::OpKind CastKind = Inst->getCastKind(); | 1992 InstCast::OpKind CastKind = Inst->getCastKind(); |
1923 Variable *Dest = Inst->getDest(); | 1993 Variable *Dest = Inst->getDest(); |
1924 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 1994 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
1925 switch (CastKind) { | 1995 switch (CastKind) { |
1926 default: | 1996 default: |
1927 Func->setError("Cast type not supported"); | 1997 Func->setError("Cast type not supported"); |
1928 return; | 1998 return; |
1929 case InstCast::Sext: { | 1999 case InstCast::Sext: { |
1930 if (isVectorType(Dest->getType())) { | 2000 if (isVectorType(Dest->getType())) { |
2001 Variable *T = makeReg(Dest->getType()); | |
2002 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0))); | |
2003 _mov(Dest, T); | |
1931 UnimplementedError(Func->getContext()->getFlags()); | 2004 UnimplementedError(Func->getContext()->getFlags()); |
1932 } else if (Dest->getType() == IceType_i64) { | 2005 } else if (Dest->getType() == IceType_i64) { |
1933 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2 | 2006 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2 |
1934 Constant *ShiftAmt = Ctx->getConstantInt32(31); | 2007 Constant *ShiftAmt = Ctx->getConstantInt32(31); |
1935 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 2008 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
1936 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2009 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
1937 Variable *T_Lo = makeReg(DestLo->getType()); | 2010 Variable *T_Lo = makeReg(DestLo->getType()); |
1938 if (Src0->getType() == IceType_i32) { | 2011 if (Src0->getType() == IceType_i32) { |
1939 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); | 2012 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
1940 _mov(T_Lo, Src0RF); | 2013 _mov(T_Lo, Src0RF); |
(...skipping 30 matching lines...) Expand all Loading... | |
1971 // t1 = sxt src; dst = t1 | 2044 // t1 = sxt src; dst = t1 |
1972 Variable *Src0R = legalizeToReg(Src0); | 2045 Variable *Src0R = legalizeToReg(Src0); |
1973 Variable *T = makeReg(Dest->getType()); | 2046 Variable *T = makeReg(Dest->getType()); |
1974 _sxt(T, Src0R); | 2047 _sxt(T, Src0R); |
1975 _mov(Dest, T); | 2048 _mov(Dest, T); |
1976 } | 2049 } |
1977 break; | 2050 break; |
1978 } | 2051 } |
1979 case InstCast::Zext: { | 2052 case InstCast::Zext: { |
1980 if (isVectorType(Dest->getType())) { | 2053 if (isVectorType(Dest->getType())) { |
2054 Variable *T = makeReg(Dest->getType()); | |
2055 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0))); | |
2056 _mov(Dest, T); | |
1981 UnimplementedError(Func->getContext()->getFlags()); | 2057 UnimplementedError(Func->getContext()->getFlags()); |
1982 } else if (Dest->getType() == IceType_i64) { | 2058 } else if (Dest->getType() == IceType_i64) { |
1983 // t1=uxtb src; dst.lo=t1; dst.hi=0 | 2059 // t1=uxtb src; dst.lo=t1; dst.hi=0 |
1984 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2060 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1985 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 2061 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
1986 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2062 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
1987 Variable *T_Lo = makeReg(DestLo->getType()); | 2063 Variable *T_Lo = makeReg(DestLo->getType()); |
1988 // i32 and i1 can just take up the whole register. i32 doesn't need uxt, | 2064 // i32 and i1 can just take up the whole register. i32 doesn't need uxt, |
1989 // while i1 will have an and mask later anyway. | 2065 // while i1 will have an and mask later anyway. |
1990 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) { | 2066 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) { |
(...skipping 26 matching lines...) Expand all Loading... | |
2017 // t1 = uxt src; dst = t1 | 2093 // t1 = uxt src; dst = t1 |
2018 Variable *Src0R = legalizeToReg(Src0); | 2094 Variable *Src0R = legalizeToReg(Src0); |
2019 Variable *T = makeReg(Dest->getType()); | 2095 Variable *T = makeReg(Dest->getType()); |
2020 _uxt(T, Src0R); | 2096 _uxt(T, Src0R); |
2021 _mov(Dest, T); | 2097 _mov(Dest, T); |
2022 } | 2098 } |
2023 break; | 2099 break; |
2024 } | 2100 } |
2025 case InstCast::Trunc: { | 2101 case InstCast::Trunc: { |
2026 if (isVectorType(Dest->getType())) { | 2102 if (isVectorType(Dest->getType())) { |
2103 Variable *T = makeReg(Dest->getType()); | |
2104 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0))); | |
2105 _mov(Dest, T); | |
2027 UnimplementedError(Func->getContext()->getFlags()); | 2106 UnimplementedError(Func->getContext()->getFlags()); |
2028 } else { | 2107 } else { |
2029 if (Src0->getType() == IceType_i64) | 2108 if (Src0->getType() == IceType_i64) |
2030 Src0 = loOperand(Src0); | 2109 Src0 = loOperand(Src0); |
2031 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); | 2110 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
2032 // t1 = trunc Src0RF; Dest = t1 | 2111 // t1 = trunc Src0RF; Dest = t1 |
2033 Variable *T = makeReg(Dest->getType()); | 2112 Variable *T = makeReg(Dest->getType()); |
2034 _mov(T, Src0RF); | 2113 _mov(T, Src0RF); |
2035 if (Dest->getType() == IceType_i1) | 2114 if (Dest->getType() == IceType_i1) |
2036 _and(T, T, Ctx->getConstantInt1(1)); | 2115 _and(T, T, Ctx->getConstantInt1(1)); |
2037 _mov(Dest, T); | 2116 _mov(Dest, T); |
2038 } | 2117 } |
2039 break; | 2118 break; |
2040 } | 2119 } |
2041 case InstCast::Fptrunc: | 2120 case InstCast::Fptrunc: |
2042 case InstCast::Fpext: { | 2121 case InstCast::Fpext: { |
2043 // fptrunc: dest.f32 = fptrunc src0.fp64 | 2122 // fptrunc: dest.f32 = fptrunc src0.fp64 |
2044 // fpext: dest.f64 = fptrunc src0.fp32 | 2123 // fpext: dest.f64 = fptrunc src0.fp32 |
2045 const bool IsTrunc = CastKind == InstCast::Fptrunc; | 2124 const bool IsTrunc = CastKind == InstCast::Fptrunc; |
2046 if (isVectorType(Dest->getType())) { | 2125 if (isVectorType(Dest->getType())) { |
2126 Variable *T = makeReg(Dest->getType()); | |
2127 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0))); | |
2128 _mov(Dest, T); | |
2047 UnimplementedError(Func->getContext()->getFlags()); | 2129 UnimplementedError(Func->getContext()->getFlags()); |
2048 break; | 2130 break; |
2049 } | 2131 } |
2050 assert(Dest->getType() == (IsTrunc ? IceType_f32 : IceType_f64)); | 2132 assert(Dest->getType() == (IsTrunc ? IceType_f32 : IceType_f64)); |
2051 assert(Src0->getType() == (IsTrunc ? IceType_f64 : IceType_f32)); | 2133 assert(Src0->getType() == (IsTrunc ? IceType_f64 : IceType_f32)); |
2052 Variable *Src0R = legalizeToReg(Src0); | 2134 Variable *Src0R = legalizeToReg(Src0); |
2053 Variable *T = makeReg(Dest->getType()); | 2135 Variable *T = makeReg(Dest->getType()); |
2054 _vcvt(T, Src0R, IsTrunc ? InstARM32Vcvt::D2s : InstARM32Vcvt::S2d); | 2136 _vcvt(T, Src0R, IsTrunc ? InstARM32Vcvt::D2s : InstARM32Vcvt::S2d); |
2055 _mov(Dest, T); | 2137 _mov(Dest, T); |
2056 break; | 2138 break; |
2057 } | 2139 } |
2058 case InstCast::Fptosi: | 2140 case InstCast::Fptosi: |
2059 case InstCast::Fptoui: { | 2141 case InstCast::Fptoui: { |
2142 if (isVectorType(Dest->getType())) { | |
2143 Variable *T = makeReg(Dest->getType()); | |
2144 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0))); | |
2145 _mov(Dest, T); | |
2146 UnimplementedError(Func->getContext()->getFlags()); | |
2147 break; | |
2148 } | |
2149 | |
2150 const bool DestIsSigned = CastKind == InstCast::Fptosi; | |
2151 const bool Src0IsF32 = isFloat32Asserting32Or64(Src0->getType()); | |
2152 if (llvm::isa<Variable64On32>(Dest)) { | |
2153 const char *HelperName = | |
2154 Src0IsF32 ? (DestIsSigned ? H_fptosi_f32_i64 : H_fptoui_f32_i64) | |
2155 : (DestIsSigned ? H_fptosi_f64_i64 : H_fptoui_f64_i64); | |
2156 static constexpr SizeT MaxSrcs = 1; | |
2157 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); | |
2158 Call->addArg(Src0); | |
2159 lowerCall(Call); | |
2160 break; | |
2161 } | |
2060 // fptosi: | 2162 // fptosi: |
2061 // t1.fp = vcvt src0.fp | 2163 // t1.fp = vcvt src0.fp |
2062 // t2.i32 = vmov t1.fp | 2164 // t2.i32 = vmov t1.fp |
2063 // dest.int = conv t2.i32 @ Truncates the result if needed. | 2165 // dest.int = conv t2.i32 @ Truncates the result if needed. |
2064 // fptoui: | 2166 // fptoui: |
2065 // t1.fp = vcvt src0.fp | 2167 // t1.fp = vcvt src0.fp |
2066 // t2.u32 = vmov t1.fp | 2168 // t2.u32 = vmov t1.fp |
2067 // dest.uint = conv t2.u32 @ Truncates the result if needed. | 2169 // dest.uint = conv t2.u32 @ Truncates the result if needed. |
2068 if (isVectorType(Dest->getType())) { | |
2069 UnimplementedError(Func->getContext()->getFlags()); | |
2070 break; | |
2071 } | |
2072 if (auto *Dest64On32 = llvm::dyn_cast<Variable64On32>(Dest)) { | |
2073 Context.insert(InstFakeDef::create(Func, Dest64On32->getLo())); | |
2074 Context.insert(InstFakeDef::create(Func, Dest64On32->getHi())); | |
2075 UnimplementedError(Func->getContext()->getFlags()); | |
2076 break; | |
2077 } | |
2078 const bool DestIsSigned = CastKind == InstCast::Fptosi; | |
2079 Variable *Src0R = legalizeToReg(Src0); | 2170 Variable *Src0R = legalizeToReg(Src0); |
2080 Variable *T_fp = makeReg(IceType_f32); | 2171 Variable *T_fp = makeReg(IceType_f32); |
2081 if (isFloat32Asserting32Or64(Src0->getType())) { | 2172 const InstARM32Vcvt::VcvtVariant Conversion = |
2082 _vcvt(T_fp, Src0R, | 2173 Src0IsF32 ? (DestIsSigned ? InstARM32Vcvt::S2si : InstARM32Vcvt::S2ui) |
2083 DestIsSigned ? InstARM32Vcvt::S2si : InstARM32Vcvt::S2ui); | 2174 : (DestIsSigned ? InstARM32Vcvt::D2si : InstARM32Vcvt::D2ui); |
2084 } else { | 2175 _vcvt(T_fp, Src0R, Conversion); |
2085 _vcvt(T_fp, Src0R, | |
2086 DestIsSigned ? InstARM32Vcvt::D2si : InstARM32Vcvt::D2ui); | |
2087 } | |
2088 Variable *T = makeReg(IceType_i32); | 2176 Variable *T = makeReg(IceType_i32); |
2089 _vmov(T, T_fp); | 2177 _mov(T, T_fp); |
2090 if (Dest->getType() != IceType_i32) { | 2178 if (Dest->getType() != IceType_i32) { |
2091 Variable *T_1 = makeReg(Dest->getType()); | 2179 Variable *T_1 = makeReg(Dest->getType()); |
2092 lowerCast(InstCast::create(Func, InstCast::Trunc, T_1, T)); | 2180 lowerCast(InstCast::create(Func, InstCast::Trunc, T_1, T)); |
2093 T = T_1; | 2181 T = T_1; |
2094 } | 2182 } |
2095 _mov(Dest, T); | 2183 _mov(Dest, T); |
2096 break; | 2184 break; |
2097 } | 2185 } |
2098 case InstCast::Sitofp: | 2186 case InstCast::Sitofp: |
2099 case InstCast::Uitofp: { | 2187 case InstCast::Uitofp: { |
2188 if (isVectorType(Dest->getType())) { | |
2189 Variable *T = makeReg(Dest->getType()); | |
2190 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0))); | |
2191 _mov(Dest, T); | |
2192 UnimplementedError(Func->getContext()->getFlags()); | |
2193 break; | |
2194 } | |
2195 const bool SourceIsSigned = CastKind == InstCast::Sitofp; | |
2196 const bool DestIsF32 = isFloat32Asserting32Or64(Dest->getType()); | |
2197 if (Src0->getType() == IceType_i64) { | |
2198 const char *HelperName = | |
2199 DestIsF32 ? (SourceIsSigned ? H_sitofp_i64_f32 : H_uitofp_i64_f32) | |
2200 : (SourceIsSigned ? H_sitofp_i64_f64 : H_uitofp_i64_f64); | |
2201 static constexpr SizeT MaxSrcs = 1; | |
2202 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); | |
2203 Call->addArg(Src0); | |
2204 lowerCall(Call); | |
2205 break; | |
2206 } | |
2100 // sitofp: | 2207 // sitofp: |
2101 // t1.i32 = sext src.int @ sign-extends src0 if needed. | 2208 // t1.i32 = sext src.int @ sign-extends src0 if needed. |
2102 // t2.fp32 = vmov t1.i32 | 2209 // t2.fp32 = vmov t1.i32 |
2103 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64 | 2210 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64 |
2104 // uitofp: | 2211 // uitofp: |
2105 // t1.i32 = zext src.int @ zero-extends src0 if needed. | 2212 // t1.i32 = zext src.int @ zero-extends src0 if needed. |
2106 // t2.fp32 = vmov t1.i32 | 2213 // t2.fp32 = vmov t1.i32 |
2107 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64 | 2214 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64 |
2108 if (isVectorType(Dest->getType())) { | |
2109 UnimplementedError(Func->getContext()->getFlags()); | |
2110 break; | |
2111 } | |
2112 if (Src0->getType() == IceType_i64) { | |
2113 // avoid cryptic liveness errors | |
2114 Context.insert(InstFakeDef::create(Func, Dest)); | |
2115 UnimplementedError(Func->getContext()->getFlags()); | |
2116 break; | |
2117 } | |
2118 const bool SourceIsSigned = CastKind == InstCast::Sitofp; | |
2119 if (Src0->getType() != IceType_i32) { | 2215 if (Src0->getType() != IceType_i32) { |
2120 Variable *Src0R_32 = makeReg(IceType_i32); | 2216 Variable *Src0R_32 = makeReg(IceType_i32); |
2121 lowerCast(InstCast::create(Func, SourceIsSigned ? InstCast::Sext | 2217 lowerCast(InstCast::create(Func, SourceIsSigned ? InstCast::Sext |
2122 : InstCast::Zext, | 2218 : InstCast::Zext, |
2123 Src0R_32, Src0)); | 2219 Src0R_32, Src0)); |
2124 Src0 = Src0R_32; | 2220 Src0 = Src0R_32; |
2125 } | 2221 } |
2126 Variable *Src0R = legalizeToReg(Src0); | 2222 Variable *Src0R = legalizeToReg(Src0); |
2127 Variable *Src0R_f32 = makeReg(IceType_f32); | 2223 Variable *Src0R_f32 = makeReg(IceType_f32); |
2128 _vmov(Src0R_f32, Src0R); | 2224 _mov(Src0R_f32, Src0R); |
2129 Src0R = Src0R_f32; | 2225 Src0R = Src0R_f32; |
2130 Variable *T = makeReg(Dest->getType()); | 2226 Variable *T = makeReg(Dest->getType()); |
2131 if (isFloat32Asserting32Or64(Dest->getType())) { | 2227 const InstARM32Vcvt::VcvtVariant Conversion = |
2132 _vcvt(T, Src0R, | 2228 DestIsF32 |
2133 SourceIsSigned ? InstARM32Vcvt::Si2s : InstARM32Vcvt::Ui2s); | 2229 ? (SourceIsSigned ? InstARM32Vcvt::Si2s : InstARM32Vcvt::Ui2s) |
2134 } else { | 2230 : (SourceIsSigned ? InstARM32Vcvt::Si2d : InstARM32Vcvt::Ui2d); |
2135 _vcvt(T, Src0R, | 2231 _vcvt(T, Src0R, Conversion); |
2136 SourceIsSigned ? InstARM32Vcvt::Si2d : InstARM32Vcvt::Ui2d); | |
2137 } | |
2138 _mov(Dest, T); | 2232 _mov(Dest, T); |
2139 break; | 2233 break; |
2140 } | 2234 } |
2141 case InstCast::Bitcast: { | 2235 case InstCast::Bitcast: { |
2142 Operand *Src0 = Inst->getSrc(0); | 2236 Operand *Src0 = Inst->getSrc(0); |
2143 if (Dest->getType() == Src0->getType()) { | 2237 if (Dest->getType() == Src0->getType()) { |
2144 InstAssign *Assign = InstAssign::create(Func, Dest, Src0); | 2238 InstAssign *Assign = InstAssign::create(Func, Dest, Src0); |
2145 lowerAssign(Assign); | 2239 lowerAssign(Assign); |
2146 return; | 2240 return; |
2147 } | 2241 } |
2148 Type DestType = Dest->getType(); | 2242 Type DestType = Dest->getType(); |
2149 switch (DestType) { | 2243 switch (DestType) { |
2150 case IceType_NUM: | 2244 case IceType_NUM: |
2151 case IceType_void: | 2245 case IceType_void: |
2152 llvm::report_fatal_error("Unexpected bitcast."); | 2246 llvm::report_fatal_error("Unexpected bitcast."); |
2153 case IceType_i1: | 2247 case IceType_i1: |
2154 UnimplementedError(Func->getContext()->getFlags()); | 2248 UnimplementedError(Func->getContext()->getFlags()); |
2155 break; | 2249 break; |
2156 case IceType_v4i1: | |
2157 UnimplementedError(Func->getContext()->getFlags()); | |
2158 break; | |
2159 case IceType_i8: | 2250 case IceType_i8: |
2160 UnimplementedError(Func->getContext()->getFlags()); | 2251 UnimplementedError(Func->getContext()->getFlags()); |
2161 break; | 2252 break; |
2162 case IceType_i16: | 2253 case IceType_i16: |
2163 UnimplementedError(Func->getContext()->getFlags()); | 2254 UnimplementedError(Func->getContext()->getFlags()); |
2164 break; | 2255 break; |
2165 case IceType_i32: | 2256 case IceType_i32: |
2166 case IceType_f32: { | 2257 case IceType_f32: { |
2167 Variable *Src0R = legalizeToReg(Src0); | 2258 Variable *Src0R = legalizeToReg(Src0); |
2168 Variable *T = makeReg(DestType); | 2259 Variable *T = makeReg(DestType); |
2169 _vmov(T, Src0R); | 2260 _mov(T, Src0R); |
2170 lowerAssign(InstAssign::create(Func, Dest, T)); | 2261 lowerAssign(InstAssign::create(Func, Dest, T)); |
2171 break; | 2262 break; |
2172 } | 2263 } |
2173 case IceType_i64: { | 2264 case IceType_i64: { |
2174 // t0, t1 <- src0 | 2265 // t0, t1 <- src0 |
2175 // dest[31..0] = t0 | 2266 // dest[31..0] = t0 |
2176 // dest[63..32] = t1 | 2267 // dest[63..32] = t1 |
2177 assert(Src0->getType() == IceType_f64); | 2268 assert(Src0->getType() == IceType_f64); |
2178 Variable *T0 = makeReg(IceType_i32); | 2269 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
2179 Variable *T1 = makeReg(IceType_i32); | 2270 T->initHiLo(Func); |
2271 forceHiLoInReg(T); | |
2180 Variable *Src0R = legalizeToReg(Src0); | 2272 Variable *Src0R = legalizeToReg(Src0); |
2181 _vmov(InstARM32Vmov::RegisterPair(T0, T1), Src0R); | 2273 _mov(T, Src0R); |
2274 Context.insert(InstFakeDef::create(Func, T->getLo())); | |
2275 Context.insert(InstFakeDef::create(Func, T->getHi())); | |
2182 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); | 2276 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); |
2183 lowerAssign(InstAssign::create(Func, Dest64On32->getLo(), T0)); | 2277 lowerAssign(InstAssign::create(Func, Dest64On32->getLo(), T->getLo())); |
2184 lowerAssign(InstAssign::create(Func, Dest64On32->getHi(), T1)); | 2278 lowerAssign(InstAssign::create(Func, Dest64On32->getHi(), T->getHi())); |
2279 Context.insert(InstFakeUse::create(Func, T)); | |
2185 break; | 2280 break; |
2186 } | 2281 } |
2187 case IceType_f64: { | 2282 case IceType_f64: { |
2188 // T0 <- lo(src) | 2283 // T0 <- lo(src) |
2189 // T1 <- hi(src) | 2284 // T1 <- hi(src) |
2190 // vmov T2, T0, T1 | 2285 // vmov T2, T0, T1 |
2191 // Dest <- T2 | 2286 // Dest <- T2 |
2192 assert(Src0->getType() == IceType_i64); | 2287 assert(Src0->getType() == IceType_i64); |
2193 Variable *SrcLo = legalizeToReg(loOperand(Src0)); | 2288 auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
2194 Variable *SrcHi = legalizeToReg(hiOperand(Src0)); | 2289 Src64->initHiLo(Func); |
2195 Variable *T = makeReg(IceType_f64); | 2290 forceHiLoInReg(Src64); |
2196 _vmov(T, InstARM32Vmov::RegisterPair(SrcLo, SrcHi)); | 2291 Variable *T = Src64->getLo(); |
2292 _mov(T, legalizeToReg(loOperand(Src0))); | |
2293 T = Src64->getHi(); | |
2294 _mov(T, legalizeToReg(hiOperand(Src0))); | |
2295 T = makeReg(IceType_f64); | |
2296 Context.insert(InstFakeDef::create(Func, Src64)); | |
2297 _mov(T, Src64); | |
2298 Context.insert(InstFakeUse::create(Func, Src64->getLo())); | |
2299 Context.insert(InstFakeUse::create(Func, Src64->getHi())); | |
2197 lowerAssign(InstAssign::create(Func, Dest, T)); | 2300 lowerAssign(InstAssign::create(Func, Dest, T)); |
2198 break; | 2301 break; |
2199 } | 2302 } |
2303 case IceType_v4i1: | |
2200 case IceType_v8i1: | 2304 case IceType_v8i1: |
2305 case IceType_v16i1: | |
2306 case IceType_v8i16: | |
2307 case IceType_v16i8: | |
2308 case IceType_v4f32: | |
2309 case IceType_v4i32: { | |
2310 // avoid cryptic liveness errors | |
2311 Variable *T = makeReg(DestType); | |
2312 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0))); | |
2313 _mov(Dest, T); | |
2201 UnimplementedError(Func->getContext()->getFlags()); | 2314 UnimplementedError(Func->getContext()->getFlags()); |
2202 break; | 2315 break; |
2203 case IceType_v16i1: | 2316 } |
2204 UnimplementedError(Func->getContext()->getFlags()); | |
2205 break; | |
2206 case IceType_v8i16: | |
2207 UnimplementedError(Func->getContext()->getFlags()); | |
2208 break; | |
2209 case IceType_v16i8: | |
2210 UnimplementedError(Func->getContext()->getFlags()); | |
2211 break; | |
2212 case IceType_v4i32: | |
2213 // avoid cryptic liveness errors | |
2214 Context.insert(InstFakeDef::create(Func, Dest)); | |
2215 UnimplementedError(Func->getContext()->getFlags()); | |
2216 break; | |
2217 case IceType_v4f32: | |
2218 UnimplementedError(Func->getContext()->getFlags()); | |
2219 break; | |
2220 } | 2317 } |
2221 break; | 2318 break; |
2222 } | 2319 } |
2223 } | 2320 } |
2224 } | 2321 } |
2225 | 2322 |
2226 void TargetARM32::lowerExtractElement(const InstExtractElement *Inst) { | 2323 void TargetARM32::lowerExtractElement(const InstExtractElement *Inst) { |
2227 (void)Inst; | 2324 Variable *Dest = Inst->getDest(); |
2325 Type DestType = Dest->getType(); | |
2326 Variable *T = makeReg(DestType); | |
2327 Context.insert(InstFakeDef::create(Func, T)); | |
2328 _mov(Dest, T); | |
2228 UnimplementedError(Func->getContext()->getFlags()); | 2329 UnimplementedError(Func->getContext()->getFlags()); |
2229 } | 2330 } |
2230 | 2331 |
2231 namespace { | 2332 namespace { |
2232 // Validates FCMPARM32_TABLE's declaration w.r.t. InstFcmp::FCondition ordering | 2333 // Validates FCMPARM32_TABLE's declaration w.r.t. InstFcmp::FCondition ordering |
2233 // (and naming). | 2334 // (and naming). |
2234 enum { | 2335 enum { |
2235 #define X(val, CC0, CC1) _fcmp_ll_##val, | 2336 #define X(val, CC0, CC1) _fcmp_ll_##val, |
2236 FCMPARM32_TABLE | 2337 FCMPARM32_TABLE |
2237 #undef X | 2338 #undef X |
(...skipping 24 matching lines...) Expand all Loading... | |
2262 { CondARM32::CC0, CondARM32::CC1 } \ | 2363 { CondARM32::CC0, CondARM32::CC1 } \ |
2263 , | 2364 , |
2264 FCMPARM32_TABLE | 2365 FCMPARM32_TABLE |
2265 #undef X | 2366 #undef X |
2266 }; | 2367 }; |
2267 } // end of anonymous namespace | 2368 } // end of anonymous namespace |
2268 | 2369 |
2269 void TargetARM32::lowerFcmp(const InstFcmp *Inst) { | 2370 void TargetARM32::lowerFcmp(const InstFcmp *Inst) { |
2270 Variable *Dest = Inst->getDest(); | 2371 Variable *Dest = Inst->getDest(); |
2271 if (isVectorType(Dest->getType())) { | 2372 if (isVectorType(Dest->getType())) { |
2373 Variable *T = makeReg(Dest->getType()); | |
2374 Context.insert(InstFakeDef::create(Func, T)); | |
2375 _mov(Dest, T); | |
2272 UnimplementedError(Func->getContext()->getFlags()); | 2376 UnimplementedError(Func->getContext()->getFlags()); |
2273 return; | 2377 return; |
2274 } | 2378 } |
2275 | 2379 |
2276 Variable *Src0R = legalizeToReg(Inst->getSrc(0)); | 2380 Variable *Src0R = legalizeToReg(Inst->getSrc(0)); |
2277 Variable *Src1R = legalizeToReg(Inst->getSrc(1)); | 2381 Variable *Src1R = legalizeToReg(Inst->getSrc(1)); |
2278 Variable *T = makeReg(IceType_i32); | 2382 Variable *T = makeReg(IceType_i32); |
2279 _vcmp(Src0R, Src1R); | 2383 _vcmp(Src0R, Src1R); |
2280 _mov(T, Ctx->getConstantZero(IceType_i32)); | 2384 _mov(T, Ctx->getConstantZero(IceType_i32)); |
2281 _vmrs(); | 2385 _vmrs(); |
(...skipping 17 matching lines...) Expand all Loading... | |
2299 } | 2403 } |
2300 _mov(Dest, T); | 2404 _mov(Dest, T); |
2301 } | 2405 } |
2302 | 2406 |
2303 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { | 2407 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { |
2304 Variable *Dest = Inst->getDest(); | 2408 Variable *Dest = Inst->getDest(); |
2305 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 2409 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
2306 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | 2410 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
2307 | 2411 |
2308 if (isVectorType(Dest->getType())) { | 2412 if (isVectorType(Dest->getType())) { |
2413 Variable *T = makeReg(Dest->getType()); | |
2414 Context.insert(InstFakeDef::create(Func, T)); | |
2415 _mov(Dest, T); | |
2309 UnimplementedError(Func->getContext()->getFlags()); | 2416 UnimplementedError(Func->getContext()->getFlags()); |
2310 return; | 2417 return; |
2311 } | 2418 } |
2312 | 2419 |
2313 // a=icmp cond, b, c ==> | 2420 // a=icmp cond, b, c ==> |
2314 // GCC does: | 2421 // GCC does: |
2315 // cmp b.hi, c.hi or cmp b.lo, c.lo | 2422 // cmp b.hi, c.hi or cmp b.lo, c.lo |
2316 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi | 2423 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi |
2317 // mov.<C1> t, #1 mov.<C1> t, #1 | 2424 // mov.<C1> t, #1 mov.<C1> t, #1 |
2318 // mov.<C2> t, #0 mov.<C2> t, #0 | 2425 // mov.<C2> t, #0 mov.<C2> t, #0 |
(...skipping 188 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2507 Call->addArg(Val); | 2614 Call->addArg(Val); |
2508 lowerCall(Call); | 2615 lowerCall(Call); |
2509 // The popcount helpers always return 32-bit values, while the intrinsic's | 2616 // The popcount helpers always return 32-bit values, while the intrinsic's |
2510 // signature matches some 64-bit platform's native instructions and expect | 2617 // signature matches some 64-bit platform's native instructions and expect |
2511 // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in | 2618 // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in |
2512 // case the user doesn't do that in the IR or doesn't toss the bits via | 2619 // case the user doesn't do that in the IR or doesn't toss the bits via |
2513 // truncate. | 2620 // truncate. |
2514 if (Val->getType() == IceType_i64) { | 2621 if (Val->getType() == IceType_i64) { |
2515 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2622 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2516 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2623 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
2517 Variable *T = nullptr; | 2624 Variable *T = makeReg(Zero->getType()); |
2518 _mov(T, Zero); | 2625 _mov(T, Zero); |
2519 _mov(DestHi, T); | 2626 _mov(DestHi, T); |
2520 } | 2627 } |
2521 return; | 2628 return; |
2522 } | 2629 } |
2523 case Intrinsics::Ctlz: { | 2630 case Intrinsics::Ctlz: { |
2524 // The "is zero undef" parameter is ignored and we always return a | 2631 // The "is zero undef" parameter is ignored and we always return a |
2525 // well-defined value. | 2632 // well-defined value. |
2526 Operand *Val = Instr->getArg(0); | 2633 Operand *Val = Instr->getArg(0); |
2527 Variable *ValLoR; | 2634 Variable *ValLoR; |
(...skipping 26 matching lines...) Expand all Loading... | |
2554 } else { | 2661 } else { |
2555 ValLoR = legalizeToReg(Val); | 2662 ValLoR = legalizeToReg(Val); |
2556 Variable *T = makeReg(IceType_i32); | 2663 Variable *T = makeReg(IceType_i32); |
2557 _rbit(T, ValLoR); | 2664 _rbit(T, ValLoR); |
2558 ValLoR = T; | 2665 ValLoR = T; |
2559 } | 2666 } |
2560 lowerCLZ(Instr->getDest(), ValLoR, ValHiR); | 2667 lowerCLZ(Instr->getDest(), ValLoR, ValHiR); |
2561 return; | 2668 return; |
2562 } | 2669 } |
2563 case Intrinsics::Fabs: { | 2670 case Intrinsics::Fabs: { |
2564 // Add a fake def to keep liveness consistent in the meantime. | 2671 Variable *Dest = Instr->getDest(); |
2565 Context.insert(InstFakeDef::create(Func, Instr->getDest())); | 2672 Type DestTy = Dest->getType(); |
2566 UnimplementedError(Func->getContext()->getFlags()); | 2673 Variable *T = makeReg(DestTy); |
2674 if (isVectorType(DestTy)) { | |
2675 // Add a fake def to keep liveness consistent in the meantime. | |
2676 Context.insert(InstFakeDef::create(Func, T)); | |
2677 _mov(Instr->getDest(), T); | |
2678 UnimplementedError(Func->getContext()->getFlags()); | |
2679 return; | |
2680 } | |
2681 _vabs(T, legalizeToReg(Instr->getArg(0))); | |
2682 _mov(Dest, T); | |
2567 return; | 2683 return; |
2568 } | 2684 } |
2569 case Intrinsics::Longjmp: { | 2685 case Intrinsics::Longjmp: { |
2570 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2); | 2686 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2); |
2571 Call->addArg(Instr->getArg(0)); | 2687 Call->addArg(Instr->getArg(0)); |
2572 Call->addArg(Instr->getArg(1)); | 2688 Call->addArg(Instr->getArg(1)); |
2573 lowerCall(Call); | 2689 lowerCall(Call); |
2574 return; | 2690 return; |
2575 } | 2691 } |
2576 case Intrinsics::Memcpy: { | 2692 case Intrinsics::Memcpy: { |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2621 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1); | 2737 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1); |
2622 Call->addArg(Instr->getArg(0)); | 2738 Call->addArg(Instr->getArg(0)); |
2623 lowerCall(Call); | 2739 lowerCall(Call); |
2624 return; | 2740 return; |
2625 } | 2741 } |
2626 case Intrinsics::Sqrt: { | 2742 case Intrinsics::Sqrt: { |
2627 Variable *Src = legalizeToReg(Instr->getArg(0)); | 2743 Variable *Src = legalizeToReg(Instr->getArg(0)); |
2628 Variable *Dest = Instr->getDest(); | 2744 Variable *Dest = Instr->getDest(); |
2629 Variable *T = makeReg(Dest->getType()); | 2745 Variable *T = makeReg(Dest->getType()); |
2630 _vsqrt(T, Src); | 2746 _vsqrt(T, Src); |
2631 _vmov(Dest, T); | 2747 _mov(Dest, T); |
2632 return; | 2748 return; |
2633 } | 2749 } |
2634 case Intrinsics::Stacksave: { | 2750 case Intrinsics::Stacksave: { |
2635 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 2751 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
2636 Variable *Dest = Instr->getDest(); | 2752 Variable *Dest = Instr->getDest(); |
2637 _mov(Dest, SP); | 2753 _mov(Dest, SP); |
2638 return; | 2754 return; |
2639 } | 2755 } |
2640 case Intrinsics::Stackrestore: { | 2756 case Intrinsics::Stackrestore: { |
2641 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 2757 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
(...skipping 25 matching lines...) Expand all Loading... | |
2667 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); | 2783 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); |
2668 _cmp(ValHiR, Zero); | 2784 _cmp(ValHiR, Zero); |
2669 Variable *T2 = makeReg(IceType_i32); | 2785 Variable *T2 = makeReg(IceType_i32); |
2670 _add(T2, T, ThirtyTwo); | 2786 _add(T2, T, ThirtyTwo); |
2671 _clz(T2, ValHiR, CondARM32::NE); | 2787 _clz(T2, ValHiR, CondARM32::NE); |
2672 // T2 is actually a source as well when the predicate is not AL (since it | 2788 // T2 is actually a source as well when the predicate is not AL (since it |
2673 // may leave T2 alone). We use set_dest_nonkillable to prolong the liveness | 2789 // may leave T2 alone). We use set_dest_nonkillable to prolong the liveness |
2674 // of T2 as if it was used as a source. | 2790 // of T2 as if it was used as a source. |
2675 _set_dest_nonkillable(); | 2791 _set_dest_nonkillable(); |
2676 _mov(DestLo, T2); | 2792 _mov(DestLo, T2); |
2677 Variable *T3 = nullptr; | 2793 Variable *T3 = makeReg(Zero->getType()); |
2678 _mov(T3, Zero); | 2794 _mov(T3, Zero); |
2679 _mov(DestHi, T3); | 2795 _mov(DestHi, T3); |
2680 return; | 2796 return; |
2681 } | 2797 } |
2682 _mov(Dest, T); | 2798 _mov(Dest, T); |
2683 return; | 2799 return; |
2684 } | 2800 } |
2685 | 2801 |
2686 void TargetARM32::lowerLoad(const InstLoad *Load) { | 2802 void TargetARM32::lowerLoad(const InstLoad *Load) { |
2687 // A Load instruction can be treated the same as an Assign instruction, after | 2803 // A Load instruction can be treated the same as an Assign instruction, after |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2727 Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0); | 2843 Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0); |
2728 Reg = S0; | 2844 Reg = S0; |
2729 } else if (Ty == IceType_f64) { | 2845 } else if (Ty == IceType_f64) { |
2730 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0); | 2846 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0); |
2731 Reg = D0; | 2847 Reg = D0; |
2732 } else if (isVectorType(Src0->getType())) { | 2848 } else if (isVectorType(Src0->getType())) { |
2733 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0); | 2849 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0); |
2734 Reg = Q0; | 2850 Reg = Q0; |
2735 } else { | 2851 } else { |
2736 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex); | 2852 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex); |
2737 _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0); | 2853 Reg = makeReg(Src0F->getType(), RegARM32::Reg_r0); |
2854 _mov(Reg, Src0F, CondARM32::AL); | |
2738 } | 2855 } |
2739 } | 2856 } |
2740 // Add a ret instruction even if sandboxing is enabled, because addEpilog | 2857 // Add a ret instruction even if sandboxing is enabled, because addEpilog |
2741 // explicitly looks for a ret instruction as a marker for where to insert the | 2858 // explicitly looks for a ret instruction as a marker for where to insert the |
2742 // frame removal instructions. addEpilog is responsible for restoring the | 2859 // frame removal instructions. addEpilog is responsible for restoring the |
2743 // "lr" register as needed prior to this ret instruction. | 2860 // "lr" register as needed prior to this ret instruction. |
2744 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg); | 2861 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg); |
2745 // Add a fake use of sp to make sure sp stays alive for the entire function. | 2862 // Add a fake use of sp to make sure sp stays alive for the entire function. |
2746 // Otherwise post-call sp adjustments get dead-code eliminated. | 2863 // Otherwise post-call sp adjustments get dead-code eliminated. |
2747 // TODO: Are there more places where the fake use should be inserted? E.g. | 2864 // TODO: Are there more places where the fake use should be inserted? E.g. |
2748 // "void f(int n){while(1) g(n);}" may not have a ret instruction. | 2865 // "void f(int n){while(1) g(n);}" may not have a ret instruction. |
2749 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 2866 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
2750 Context.insert(InstFakeUse::create(Func, SP)); | 2867 Context.insert(InstFakeUse::create(Func, SP)); |
2751 } | 2868 } |
2752 | 2869 |
2753 void TargetARM32::lowerSelect(const InstSelect *Inst) { | 2870 void TargetARM32::lowerSelect(const InstSelect *Inst) { |
2754 Variable *Dest = Inst->getDest(); | 2871 Variable *Dest = Inst->getDest(); |
2755 Type DestTy = Dest->getType(); | 2872 Type DestTy = Dest->getType(); |
2756 Operand *SrcT = Inst->getTrueOperand(); | 2873 Operand *SrcT = Inst->getTrueOperand(); |
2757 Operand *SrcF = Inst->getFalseOperand(); | 2874 Operand *SrcF = Inst->getFalseOperand(); |
2758 Operand *Condition = Inst->getCondition(); | 2875 Operand *Condition = Inst->getCondition(); |
2759 | 2876 |
2760 if (isVectorType(DestTy)) { | 2877 if (isVectorType(DestTy)) { |
2878 Variable *T = makeReg(DestTy); | |
2879 Context.insert(InstFakeDef::create(Func, T)); | |
2880 _mov(Dest, T); | |
2761 UnimplementedError(Func->getContext()->getFlags()); | 2881 UnimplementedError(Func->getContext()->getFlags()); |
2762 return; | 2882 return; |
2763 } | 2883 } |
2764 // TODO(jvoung): handle folding opportunities. | 2884 // TODO(jvoung): handle folding opportunities. |
2765 // cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t | 2885 // cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t |
2766 Variable *CmpOpnd0 = legalizeToReg(Condition); | 2886 Variable *CmpOpnd0 = legalizeToReg(Condition); |
2767 Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32); | 2887 Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32); |
2768 _cmp(CmpOpnd0, CmpOpnd1); | 2888 _cmp(CmpOpnd0, CmpOpnd1); |
2769 static constexpr CondARM32::Cond Cond = CondARM32::NE; | 2889 static constexpr CondARM32::Cond Cond = CondARM32::NE; |
2770 if (DestTy == IceType_i64) { | 2890 if (DestTy == IceType_i64) { |
2771 SrcT = legalizeUndef(SrcT); | 2891 SrcT = legalizeUndef(SrcT); |
2772 SrcF = legalizeUndef(SrcF); | 2892 SrcF = legalizeUndef(SrcF); |
2773 // Set the low portion. | 2893 // Set the low portion. |
2774 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 2894 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
2775 Variable *TLo = nullptr; | |
2776 Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg | Legal_Flex); | 2895 Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg | Legal_Flex); |
2896 Variable *TLo = makeReg(SrcFLo->getType()); | |
2777 _mov(TLo, SrcFLo); | 2897 _mov(TLo, SrcFLo); |
2778 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex); | 2898 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex); |
2779 _mov_nonkillable(TLo, SrcTLo, Cond); | 2899 _mov_nonkillable(TLo, SrcTLo, Cond); |
2780 _mov(DestLo, TLo); | 2900 _mov(DestLo, TLo); |
2781 // Set the high portion. | 2901 // Set the high portion. |
2782 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2902 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2783 Variable *THi = nullptr; | |
2784 Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg | Legal_Flex); | 2903 Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg | Legal_Flex); |
2904 Variable *THi = makeReg(SrcFHi->getType()); | |
2785 _mov(THi, SrcFHi); | 2905 _mov(THi, SrcFHi); |
2786 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex); | 2906 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex); |
2787 _mov_nonkillable(THi, SrcTHi, Cond); | 2907 _mov_nonkillable(THi, SrcTHi, Cond); |
2788 _mov(DestHi, THi); | 2908 _mov(DestHi, THi); |
2789 return; | 2909 return; |
2790 } | 2910 } |
2791 | 2911 |
2792 if (isFloatingType(DestTy)) { | 2912 if (isFloatingType(DestTy)) { |
2793 Variable *T = makeReg(DestTy); | 2913 Variable *T = makeReg(DestTy); |
2794 SrcF = legalizeToReg(SrcF); | 2914 SrcF = legalizeToReg(SrcF); |
2795 assert(DestTy == SrcF->getType()); | 2915 assert(DestTy == SrcF->getType()); |
2796 _vmov(T, SrcF); | 2916 _mov(T, SrcF); |
2797 SrcT = legalizeToReg(SrcT); | 2917 SrcT = legalizeToReg(SrcT); |
2798 assert(DestTy == SrcT->getType()); | 2918 assert(DestTy == SrcT->getType()); |
2799 _vmov(T, SrcT, Cond); | 2919 _mov(T, SrcT, Cond); |
2800 _set_dest_nonkillable(); | 2920 _set_dest_nonkillable(); |
2801 _vmov(Dest, T); | 2921 _mov(Dest, T); |
2802 return; | 2922 return; |
2803 } | 2923 } |
2804 | 2924 |
2805 Variable *T = nullptr; | |
2806 SrcF = legalize(SrcF, Legal_Reg | Legal_Flex); | 2925 SrcF = legalize(SrcF, Legal_Reg | Legal_Flex); |
2926 Variable *T = makeReg(SrcF->getType()); | |
2807 _mov(T, SrcF); | 2927 _mov(T, SrcF); |
2808 SrcT = legalize(SrcT, Legal_Reg | Legal_Flex); | 2928 SrcT = legalize(SrcT, Legal_Reg | Legal_Flex); |
2809 _mov_nonkillable(T, SrcT, Cond); | 2929 _mov_nonkillable(T, SrcT, Cond); |
2810 _mov(Dest, T); | 2930 _mov(Dest, T); |
2811 } | 2931 } |
2812 | 2932 |
2813 void TargetARM32::lowerStore(const InstStore *Inst) { | 2933 void TargetARM32::lowerStore(const InstStore *Inst) { |
2814 Operand *Value = Inst->getData(); | 2934 Operand *Value = Inst->getData(); |
2815 Operand *Addr = Inst->getAddr(); | 2935 Operand *Addr = Inst->getAddr(); |
2816 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); | 2936 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); |
2817 Type Ty = NewAddr->getType(); | 2937 Type Ty = NewAddr->getType(); |
2818 | 2938 |
2819 if (Ty == IceType_i64) { | 2939 if (Ty == IceType_i64) { |
2820 Value = legalizeUndef(Value); | 2940 Value = legalizeUndef(Value); |
2821 Variable *ValueHi = legalizeToReg(hiOperand(Value)); | 2941 Variable *ValueHi = legalizeToReg(hiOperand(Value)); |
2822 Variable *ValueLo = legalizeToReg(loOperand(Value)); | 2942 Variable *ValueLo = legalizeToReg(loOperand(Value)); |
2823 _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr))); | 2943 _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr))); |
2824 _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr))); | 2944 _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr))); |
2825 } else { | 2945 } else { |
2826 if (isVectorType(Ty)) { | |
2827 UnimplementedError(Func->getContext()->getFlags()); | |
2828 } | |
2829 Variable *ValueR = legalizeToReg(Value); | 2946 Variable *ValueR = legalizeToReg(Value); |
2830 _str(ValueR, NewAddr); | 2947 _str(ValueR, NewAddr); |
2831 } | 2948 } |
2832 } | 2949 } |
2833 | 2950 |
2834 void TargetARM32::doAddressOptStore() { | 2951 void TargetARM32::doAddressOptStore() { |
2835 UnimplementedError(Func->getContext()->getFlags()); | 2952 UnimplementedError(Func->getContext()->getFlags()); |
2836 } | 2953 } |
2837 | 2954 |
2838 void TargetARM32::lowerSwitch(const InstSwitch *Inst) { | 2955 void TargetARM32::lowerSwitch(const InstSwitch *Inst) { |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2871 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Inst*/) { | 2988 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Inst*/) { |
2872 _trap(); | 2989 _trap(); |
2873 } | 2990 } |
2874 | 2991 |
2875 void TargetARM32::prelowerPhis() { | 2992 void TargetARM32::prelowerPhis() { |
2876 PhiLowering::prelowerPhis32Bit<TargetARM32>(this, Context.getNode(), Func); | 2993 PhiLowering::prelowerPhis32Bit<TargetARM32>(this, Context.getNode(), Func); |
2877 } | 2994 } |
2878 | 2995 |
2879 Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) { | 2996 Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
2880 Variable *Reg = makeReg(Ty, RegNum); | 2997 Variable *Reg = makeReg(Ty, RegNum); |
2998 Context.insert(InstFakeDef::create(Func, Reg)); | |
2881 UnimplementedError(Func->getContext()->getFlags()); | 2999 UnimplementedError(Func->getContext()->getFlags()); |
2882 return Reg; | 3000 return Reg; |
2883 } | 3001 } |
2884 | 3002 |
2885 // Helper for legalize() to emit the right code to lower an operand to a | 3003 // Helper for legalize() to emit the right code to lower an operand to a |
2886 // register of the appropriate type. | 3004 // register of the appropriate type. |
2887 Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) { | 3005 Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) { |
2888 Type Ty = Src->getType(); | 3006 Type Ty = Src->getType(); |
2889 Variable *Reg = makeReg(Ty, RegNum); | 3007 Variable *Reg = makeReg(Ty, RegNum); |
2890 if (isVectorType(Ty)) { | 3008 _mov(Reg, Src); |
2891 // TODO(jpp): Src must be a register, or an address with base register. | |
2892 _vmov(Reg, Src); | |
2893 } else if (isFloatingType(Ty)) { | |
2894 _vmov(Reg, Src); | |
2895 } else { | |
2896 // Mov's Src operand can really only be the flexible second operand type or | |
2897 // a register. Users should guarantee that. | |
2898 _mov(Reg, Src); | |
2899 } | |
2900 return Reg; | 3009 return Reg; |
2901 } | 3010 } |
2902 | 3011 |
2903 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, | 3012 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, |
2904 int32_t RegNum) { | 3013 int32_t RegNum) { |
2905 Type Ty = From->getType(); | 3014 Type Ty = From->getType(); |
2906 // Assert that a physical register is allowed. To date, all calls to | 3015 // Assert that a physical register is allowed. To date, all calls to |
2907 // legalize() allow a physical register. Legal_Flex converts registers to the | 3016 // legalize() allow a physical register. Legal_Flex converts registers to the |
2908 // right type OperandARM32FlexReg as needed. | 3017 // right type OperandARM32FlexReg as needed. |
2909 assert(Allowed & Legal_Reg); | 3018 assert(Allowed & Legal_Reg); |
2910 // Go through the various types of operands: OperandARM32Mem, | 3019 // Go through the various types of operands: OperandARM32Mem, |
2911 // OperandARM32Flex, Constant, and Variable. Given the above assertion, if | 3020 // OperandARM32Flex, Constant, and Variable. Given the above assertion, if |
2912 // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we | 3021 // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we |
2913 // can always copy to a register. | 3022 // can always copy to a register. |
2914 if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) { | 3023 if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) { |
3024 static const struct { | |
3025 bool CanHaveOffset; | |
3026 bool CanHaveIndex; | |
3027 } MemTraits[] = { | |
3028 #define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr) \ | |
3029 { (ubits) > 0, rraddr } \ | |
3030 , | |
3031 ICETYPEARM32_TABLE | |
3032 #undef X | |
3033 }; | |
2915 // Before doing anything with a Mem operand, we need to ensure that the | 3034 // Before doing anything with a Mem operand, we need to ensure that the |
2916 // Base and Index components are in physical registers. | 3035 // Base and Index components are in physical registers. |
2917 Variable *Base = Mem->getBase(); | 3036 Variable *Base = Mem->getBase(); |
2918 Variable *Index = Mem->getIndex(); | 3037 Variable *Index = Mem->getIndex(); |
3038 ConstantInteger32 *Offset = Mem->getOffset(); | |
3039 assert(Index == nullptr || Offset == nullptr); | |
2919 Variable *RegBase = nullptr; | 3040 Variable *RegBase = nullptr; |
2920 Variable *RegIndex = nullptr; | 3041 Variable *RegIndex = nullptr; |
2921 if (Base) { | 3042 if (Base) { |
2922 RegBase = legalizeToReg(Base); | 3043 RegBase = legalizeToReg(Base); |
2923 } | 3044 } |
2924 if (Index) { | 3045 if (Index) { |
2925 RegIndex = legalizeToReg(Index); | 3046 RegIndex = legalizeToReg(Index); |
3047 if (!MemTraits[Ty].CanHaveIndex) { | |
3048 Variable *T = makeReg(IceType_i32, getReservedTmpReg()); | |
3049 _add(T, RegBase, RegIndex); | |
3050 RegBase = T; | |
3051 RegIndex = nullptr; | |
3052 } | |
2926 } | 3053 } |
3054 if (Offset && Offset->getValue() != 0) { | |
3055 static constexpr bool SignExt = false; | |
3056 if (!MemTraits[Ty].CanHaveOffset || | |
3057 !OperandARM32Mem::canHoldOffset(Ty, SignExt, Offset->getValue())) { | |
3058 Variable *T = legalizeToReg(Offset, getReservedTmpReg()); | |
3059 _add(T, T, RegBase); | |
3060 RegBase = T; | |
3061 Offset = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(0)); | |
3062 } | |
3063 } | |
3064 | |
2927 // Create a new operand if there was a change. | 3065 // Create a new operand if there was a change. |
2928 if (Base != RegBase || Index != RegIndex) { | 3066 if (Base != RegBase || Index != RegIndex) { |
2929 // There is only a reg +/- reg or reg + imm form. | 3067 // There is only a reg +/- reg or reg + imm form. |
2930 // Figure out which to re-create. | 3068 // Figure out which to re-create. |
2931 if (Mem->isRegReg()) { | 3069 if (RegBase && RegIndex) { |
2932 Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex, | 3070 Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex, |
2933 Mem->getShiftOp(), Mem->getShiftAmt(), | 3071 Mem->getShiftOp(), Mem->getShiftAmt(), |
2934 Mem->getAddrMode()); | 3072 Mem->getAddrMode()); |
2935 } else { | 3073 } else { |
2936 Mem = OperandARM32Mem::create(Func, Ty, RegBase, Mem->getOffset(), | 3074 Mem = OperandARM32Mem::create(Func, Ty, RegBase, Offset, |
2937 Mem->getAddrMode()); | 3075 Mem->getAddrMode()); |
2938 } | 3076 } |
2939 } | 3077 } |
2940 if (!(Allowed & Legal_Mem)) { | 3078 if (Allowed & Legal_Mem) { |
3079 From = Mem; | |
3080 } else { | |
2941 Variable *Reg = makeReg(Ty, RegNum); | 3081 Variable *Reg = makeReg(Ty, RegNum); |
2942 if (isVectorType(Ty)) { | 3082 _ldr(Reg, Mem); |
2943 UnimplementedError(Func->getContext()->getFlags()); | |
2944 } else if (isFloatingType(Ty)) { | |
2945 _vldr(Reg, Mem); | |
2946 } else { | |
2947 _ldr(Reg, Mem); | |
2948 } | |
2949 From = Reg; | 3083 From = Reg; |
2950 } else { | |
2951 From = Mem; | |
2952 } | 3084 } |
2953 return From; | 3085 return From; |
2954 } | 3086 } |
2955 | 3087 |
2956 if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) { | 3088 if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) { |
2957 if (!(Allowed & Legal_Flex)) { | 3089 if (!(Allowed & Legal_Flex)) { |
2958 if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) { | 3090 if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) { |
2959 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) { | 3091 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) { |
2960 From = FlexReg->getReg(); | 3092 From = FlexReg->getReg(); |
2961 // Fall through and let From be checked as a Variable below, where it | 3093 // Fall through and let From be checked as a Variable below, where it |
(...skipping 392 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3354 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; | 3486 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; |
3355 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { | 3487 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { |
3356 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; | 3488 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; |
3357 } | 3489 } |
3358 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 3490 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
3359 // However, for compatibility with current NaCl LLVM, don't claim that. | 3491 // However, for compatibility with current NaCl LLVM, don't claim that. |
3360 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 3492 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
3361 } | 3493 } |
3362 | 3494 |
3363 } // end of namespace Ice | 3495 } // end of namespace Ice |
OLD | NEW |