Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(126)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1151663004: Subzero ARM: do lowerIcmp, lowerBr, and a bit of lowerCall. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: clang-format Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringARM32 class, which consists almost 10 // This file implements the TargetLoweringARM32 class, which consists almost
(...skipping 22 matching lines...) Expand all
33 namespace { 33 namespace {
34 void UnimplementedError(const ClFlags &Flags) { 34 void UnimplementedError(const ClFlags &Flags) {
35 if (!Flags.getSkipUnimplemented()) { 35 if (!Flags.getSkipUnimplemented()) {
36 // Use llvm_unreachable instead of report_fatal_error, which gives better 36 // Use llvm_unreachable instead of report_fatal_error, which gives better
37 // stack traces. 37 // stack traces.
38 llvm_unreachable("Not yet implemented"); 38 llvm_unreachable("Not yet implemented");
39 abort(); 39 abort();
40 } 40 }
41 } 41 }
42 42
43 // The following table summarizes the logic for lowering the icmp instruction
44 // for i32 and narrower types. Each icmp condition has a clear mapping to an
45 // ARM32 conditional move instruction.
46
47 const struct TableIcmp32_ {
48 CondARM32::Cond Mapping;
49 } TableIcmp32[] = {
50 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
51 { CondARM32::C_32 } \
52 ,
53 ICMPARM32_TABLE
54 #undef X
55 };
56 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
57
58 // The following table summarizes the logic for lowering the icmp instruction
59 // for the i64 type. Two conditional moves are needed for setting to 1 or 0.
60 // The operands may need to be swapped, and there is a slight difference
61 // for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
62 const struct TableIcmp64_ {
63 bool IsSigned;
64 bool Swapped;
65 CondARM32::Cond C1, C2;
66 } TableIcmp64[] = {
67 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
68 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \
69 ,
70 ICMPARM32_TABLE
71 #undef X
72 };
73 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
74
75 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
76 size_t Index = static_cast<size_t>(Cond);
77 assert(Index < TableIcmp32Size);
78 return TableIcmp32[Index].Mapping;
79 }
80
81 // In some cases, there are x-macros tables for both high-level and
82 // low-level instructions/operands that use the same enum key value.
83 // The tables are kept separate to maintain a proper separation
84 // between abstraction layers. There is a risk that the tables could
85 // get out of sync if enum values are reordered or if entries are
86 // added or deleted. The following dummy namespaces use
87 // static_asserts to ensure everything is kept in sync.
88
89 // Validate the enum values in ICMPARM32_TABLE.
90 namespace dummy1 {
91 // Define a temporary set of enum values based on low-level table
92 // entries.
93 enum _tmp_enum {
94 #define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,
95 ICMPARM32_TABLE
96 #undef X
97 _num
98 };
99 // Define a set of constants based on high-level table entries.
100 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
101 ICEINSTICMP_TABLE
102 #undef X
103 // Define a set of constants based on low-level table entries, and
104 // ensure the table entry keys are consistent.
105 #define X(val, signed, swapped64, C_32, C1_64, C2_64) \
106 static const int _table2_##val = _tmp_##val; \
107 static_assert( \
108 _table1_##val == _table2_##val, \
109 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
110 ICMPARM32_TABLE
111 #undef X
112 // Repeat the static asserts with respect to the high-level table
113 // entries in case the high-level table has extra entries.
114 #define X(tag, str) \
115 static_assert( \
116 _table1_##tag == _table2_##tag, \
117 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
118 ICEINSTICMP_TABLE
119 #undef X
120 } // end of namespace dummy1
121
43 // The maximum number of arguments to pass in GPR registers. 122 // The maximum number of arguments to pass in GPR registers.
44 const uint32_t ARM32_MAX_GPR_ARG = 4; 123 const uint32_t ARM32_MAX_GPR_ARG = 4;
45 124
46 } // end of anonymous namespace 125 } // end of anonymous namespace
47 126
48 TargetARM32::TargetARM32(Cfg *Func) 127 TargetARM32::TargetARM32(Cfg *Func)
49 : TargetLowering(Func), UsesFramePointer(false) { 128 : TargetLowering(Func), UsesFramePointer(false) {
50 // TODO: Don't initialize IntegerRegisters and friends every time. 129 // TODO: Don't initialize IntegerRegisters and friends every time.
51 // Instead, initialize in some sort of static initializer for the 130 // Instead, initialize in some sort of static initializer for the
52 // class. 131 // class.
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after
211 return; 290 return;
212 Func->dump("After stack frame mapping"); 291 Func->dump("After stack frame mapping");
213 292
214 // Nop insertion 293 // Nop insertion
215 if (Ctx->getFlags().shouldDoNopInsertion()) { 294 if (Ctx->getFlags().shouldDoNopInsertion()) {
216 Func->doNopInsertion(); 295 Func->doNopInsertion();
217 } 296 }
218 } 297 }
219 298
220 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) { 299 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
221 (void)I; 300 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) {
222 (void)NextNode; 301 return Br->optimizeBranch(NextNode);
223 UnimplementedError(Func->getContext()->getFlags()); 302 }
224 return false; 303 return false;
225 } 304 }
226 305
227 IceString TargetARM32::RegNames[] = { 306 IceString TargetARM32::RegNames[] = {
228 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ 307 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
229 isFP) \ 308 isFP) \
230 name, 309 name,
231 REGARM32_TABLE 310 REGARM32_TABLE
232 #undef X 311 #undef X
233 }; 312 };
(...skipping 509 matching lines...) Expand 10 before | Expand all | Expand 10 after
743 } 822 }
744 if (isVectorType(Dest->getType())) { 823 if (isVectorType(Dest->getType())) {
745 UnimplementedError(Func->getContext()->getFlags()); 824 UnimplementedError(Func->getContext()->getFlags());
746 } else { 825 } else {
747 _mov(Dest, SrcR); 826 _mov(Dest, SrcR);
748 } 827 }
749 } 828 }
750 } 829 }
751 830
752 void TargetARM32::lowerBr(const InstBr *Inst) { 831 void TargetARM32::lowerBr(const InstBr *Inst) {
753 (void)Inst; 832 if (Inst->isUnconditional()) {
754 UnimplementedError(Func->getContext()->getFlags()); 833 _br(Inst->getTargetUnconditional());
834 return;
835 }
836 Operand *Cond = Inst->getCondition();
837 // TODO(jvoung): Handle folding opportunities.
838
839 Variable *Src0R = legalizeToVar(Cond);
840 Constant *Zero = Ctx->getConstantZero(IceType_i32);
841 _cmp(Src0R, Zero);
842 _br(CondARM32::NE, Inst->getTargetTrue(), Inst->getTargetFalse());
755 } 843 }
756 844
757 void TargetARM32::lowerCall(const InstCall *Inst) { 845 void TargetARM32::lowerCall(const InstCall *Instr) {
758 (void)Inst; 846 // TODO(jvoung): assign arguments to registers and stack. Also reserve stack.
759 UnimplementedError(Func->getContext()->getFlags()); 847 if (Instr->getNumArgs()) {
848 UnimplementedError(Func->getContext()->getFlags());
849 }
850
851 // Generate the call instruction. Assign its result to a temporary
852 // with high register allocation weight.
853 Variable *Dest = Instr->getDest();
854 // ReturnReg doubles as ReturnRegLo as necessary.
855 Variable *ReturnReg = nullptr;
856 Variable *ReturnRegHi = nullptr;
857 if (Dest) {
858 switch (Dest->getType()) {
859 case IceType_NUM:
860 llvm_unreachable("Invalid Call dest type");
861 break;
862 case IceType_void:
863 break;
864 case IceType_i1:
865 case IceType_i8:
866 case IceType_i16:
867 case IceType_i32:
868 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
869 break;
870 case IceType_i64:
871 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
872 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
873 break;
874 case IceType_f32:
875 case IceType_f64:
876 // Use S and D regs.
877 UnimplementedError(Func->getContext()->getFlags());
878 break;
879 case IceType_v4i1:
880 case IceType_v8i1:
881 case IceType_v16i1:
882 case IceType_v16i8:
883 case IceType_v8i16:
884 case IceType_v4i32:
885 case IceType_v4f32:
886 // Use Q regs.
887 UnimplementedError(Func->getContext()->getFlags());
888 break;
889 }
890 }
891 Operand *CallTarget = Instr->getCallTarget();
892 // Allow ConstantRelocatable to be left alone as a direct call,
893 // but force other constants like ConstantInteger32 to be in
894 // a register and make it an indirect call.
895 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
896 CallTarget = legalize(CallTarget, Legal_Reg);
897 }
898 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
899 Context.insert(NewCall);
900 if (ReturnRegHi)
901 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
902
903 // Insert a register-kill pseudo instruction.
904 Context.insert(InstFakeKill::create(Func, NewCall));
905
906 // Generate a FakeUse to keep the call live if necessary.
907 if (Instr->hasSideEffects() && ReturnReg) {
908 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
909 Context.insert(FakeUse);
910 }
911
912 if (!Dest)
913 return;
914
915 // Assign the result of the call to Dest.
916 if (ReturnReg) {
917 if (ReturnRegHi) {
918 assert(Dest->getType() == IceType_i64);
919 split64(Dest);
920 Variable *DestLo = Dest->getLo();
921 Variable *DestHi = Dest->getHi();
922 _mov(DestLo, ReturnReg);
923 _mov(DestHi, ReturnRegHi);
924 } else {
925 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
926 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
927 isVectorType(Dest->getType()));
928 if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
929 UnimplementedError(Func->getContext()->getFlags());
930 } else {
931 _mov(Dest, ReturnReg);
932 }
933 }
934 }
760 } 935 }
761 936
762 void TargetARM32::lowerCast(const InstCast *Inst) { 937 void TargetARM32::lowerCast(const InstCast *Inst) {
763 InstCast::OpKind CastKind = Inst->getCastKind(); 938 InstCast::OpKind CastKind = Inst->getCastKind();
764 switch (CastKind) { 939 switch (CastKind) {
765 default: 940 default:
766 Func->setError("Cast type not supported"); 941 Func->setError("Cast type not supported");
767 return; 942 return;
768 case InstCast::Sext: { 943 case InstCast::Sext: {
769 UnimplementedError(Func->getContext()->getFlags()); 944 UnimplementedError(Func->getContext()->getFlags());
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
808 (void)Inst; 983 (void)Inst;
809 UnimplementedError(Func->getContext()->getFlags()); 984 UnimplementedError(Func->getContext()->getFlags());
810 } 985 }
811 986
812 void TargetARM32::lowerFcmp(const InstFcmp *Inst) { 987 void TargetARM32::lowerFcmp(const InstFcmp *Inst) {
813 (void)Inst; 988 (void)Inst;
814 UnimplementedError(Func->getContext()->getFlags()); 989 UnimplementedError(Func->getContext()->getFlags());
815 } 990 }
816 991
817 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { 992 void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
818 (void)Inst; 993 Variable *Dest = Inst->getDest();
819 UnimplementedError(Func->getContext()->getFlags()); 994 Operand *Src0 = Inst->getSrc(0);
995 Operand *Src1 = Inst->getSrc(1);
996
997 if (isVectorType(Dest->getType())) {
998 UnimplementedError(Func->getContext()->getFlags());
999 return;
1000 }
1001
1002 // a=icmp cond, b, c ==>
1003 // GCC does:
1004 // cmp b.hi, c.hi or cmp b.lo, c.lo
1005 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
1006 // mov.<C1> t, #1 mov.<C1> t, #1
1007 // mov.<C2> t, #0 mov.<C2> t, #0
1008 // mov a, t mov a, t
1009 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
1010 // is used for signed compares. In some cases, b and c need to be swapped
1011 // as well.
1012 //
1013 // LLVM does:
1014 // for EQ and NE:
1015 // eor t1, b.hi, c.hi
1016 // eor t2, b.lo, c.hi
1017 // orrs t, t1, t2
1018 // mov.<C> t, #1
1019 // mov a, t
1020 //
1021 // that's nice in that it's just as short but has fewer dependencies
1022 // for better ILP at the cost of more registers.
1023 //
1024 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with
1025 // two unconditional mov #0, two cmps, two conditional mov #1,
1026 // and one conditonal reg mov. That has few dependencies for good ILP,
1027 // but is a longer sequence.
1028 //
1029 // So, we are going with the GCC version since it's usually better (except
1030 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
1031 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1032 Constant *One = Ctx->getConstantInt32(1);
1033 if (Src0->getType() == IceType_i64) {
1034 InstIcmp::ICond Conditon = Inst->getCondition();
1035 size_t Index = static_cast<size_t>(Conditon);
1036 assert(Index < TableIcmp64Size);
1037 Variable *Src0Lo, *Src0Hi;
1038 Operand *Src1LoRF, *Src1HiRF;
1039 if (TableIcmp64[Index].Swapped) {
1040 Src0Lo = legalizeToVar(loOperand(Src1));
1041 Src0Hi = legalizeToVar(hiOperand(Src1));
1042 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
1043 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
1044 } else {
1045 Src0Lo = legalizeToVar(loOperand(Src0));
1046 Src0Hi = legalizeToVar(hiOperand(Src0));
1047 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1048 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1049 }
1050 Variable *T = makeReg(IceType_i32);
1051 if (TableIcmp64[Index].IsSigned) {
1052 Variable *ScratchReg = makeReg(IceType_i32);
1053 _cmp(Src0Lo, Src1LoRF);
1054 _sbcs(ScratchReg, Src0Hi, Src1HiRF);
1055 } else {
1056 _cmp(Src0Hi, Src1HiRF);
1057 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
1058 }
1059 _mov(T, One, TableIcmp64[Index].C1);
1060 _mov_nonkillable(T, Zero, TableIcmp64[Index].C2);
1061 _mov(Dest, T);
1062 return;
1063 }
1064
1065 // a=icmp cond b, c ==>
1066 // GCC does:
1067 // <u/s>xtb tb, b
1068 // <u/s>xtb tc, c
1069 // cmp tb, tc
1070 // mov.C1 t, #0
1071 // mov.C2 t, #1
1072 // mov a, t
1073 // where the unsigned/sign extension is not needed for 32-bit.
1074 // They also have special cases for EQ and NE. E.g., for NE:
1075 // <extend to tb, tc>
1076 // subs t, tb, tc
1077 // movne t, #1
1078 // mov a, t
1079 //
1080 // LLVM does:
1081 // lsl tb, b, #<N>
1082 // mov t, #0
1083 // cmp tb, c, lsl #<N>
1084 // mov.<C> t, #1
1085 // mov a, t
1086 //
1087 // the left shift is by 0, 16, or 24, which allows the comparison to focus
1088 // on the digits that actually matter (for 16-bit or 8-bit signed/unsigned).
1089 // For the unsigned case, for some reason it does similar to GCC and does
1090 // a uxtb first. It's not clear to me why that special-casing is needed.
jvoung (off chromium) 2015/05/21 22:52:03 I omitted the uxtb -- haven't run any cross tests
1091 //
1092 // We'll go with the LLVM way for now, since it's shorter and has just as
1093 // few dependencies.
1094 int32_t ShiftAmount = 32 - getScalarIntBitWidth(Src0->getType());
1095 assert(ShiftAmount >= 0);
1096 Constant *ShiftConst = nullptr;
1097 Variable *Src0R = nullptr;
1098 Variable *T = makeReg(IceType_i32);
1099 if (ShiftAmount) {
1100 ShiftConst = Ctx->getConstantInt32(ShiftAmount);
1101 Src0R = makeReg(IceType_i32);
1102 _lsl(Src0R, legalizeToVar(Src0), ShiftConst);
1103 } else {
1104 Src0R = legalizeToVar(Src0);
1105 }
1106 _mov(T, Zero);
1107 if (ShiftAmount) {
1108 Variable *Src1R = legalizeToVar(Src1);
1109 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
1110 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
1111 _cmp(Src0R, Src1RShifted);
1112 } else {
1113 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
1114 _cmp(Src0R, Src1RF);
1115 }
1116 _mov_nonkillable(T, One, getIcmp32Mapping(Inst->getCondition()));
1117 _mov(Dest, T);
1118 return;
820 } 1119 }
821 1120
822 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) { 1121 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {
823 (void)Inst; 1122 (void)Inst;
824 UnimplementedError(Func->getContext()->getFlags()); 1123 UnimplementedError(Func->getContext()->getFlags());
825 } 1124 }
826 1125
827 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 1126 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
828 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { 1127 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
829 case Intrinsics::AtomicCmpxchg: { 1128 case Intrinsics::AtomicCmpxchg: {
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after
979 Variable *R0 = legalizeToVar(loOperand(Src0), RegARM32::Reg_r0); 1278 Variable *R0 = legalizeToVar(loOperand(Src0), RegARM32::Reg_r0);
980 Variable *R1 = legalizeToVar(hiOperand(Src0), RegARM32::Reg_r1); 1279 Variable *R1 = legalizeToVar(hiOperand(Src0), RegARM32::Reg_r1);
981 Reg = R0; 1280 Reg = R0;
982 Context.insert(InstFakeUse::create(Func, R1)); 1281 Context.insert(InstFakeUse::create(Func, R1));
983 } else if (isScalarFloatingType(Src0->getType())) { 1282 } else if (isScalarFloatingType(Src0->getType())) {
984 UnimplementedError(Func->getContext()->getFlags()); 1283 UnimplementedError(Func->getContext()->getFlags());
985 } else if (isVectorType(Src0->getType())) { 1284 } else if (isVectorType(Src0->getType())) {
986 UnimplementedError(Func->getContext()->getFlags()); 1285 UnimplementedError(Func->getContext()->getFlags());
987 } else { 1286 } else {
988 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex); 1287 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
989 _mov(Reg, Src0F, RegARM32::Reg_r0); 1288 _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);
990 } 1289 }
991 } 1290 }
992 // Add a ret instruction even if sandboxing is enabled, because 1291 // Add a ret instruction even if sandboxing is enabled, because
993 // addEpilog explicitly looks for a ret instruction as a marker for 1292 // addEpilog explicitly looks for a ret instruction as a marker for
994 // where to insert the frame removal instructions. 1293 // where to insert the frame removal instructions.
995 // addEpilog is responsible for restoring the "lr" register as needed 1294 // addEpilog is responsible for restoring the "lr" register as needed
996 // prior to this ret instruction. 1295 // prior to this ret instruction.
997 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg); 1296 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
998 // Add a fake use of sp to make sure sp stays alive for the entire 1297 // Add a fake use of sp to make sure sp stays alive for the entire
999 // function. Otherwise post-call sp adjustments get dead-code 1298 // function. Otherwise post-call sp adjustments get dead-code
(...skipping 292 matching lines...) Expand 10 before | Expand all | Expand 10 after
1292 } 1591 }
1293 } 1592 }
1294 1593
1295 void TargetDataARM32::lowerConstants() const { 1594 void TargetDataARM32::lowerConstants() const {
1296 if (Ctx->getFlags().getDisableTranslation()) 1595 if (Ctx->getFlags().getDisableTranslation())
1297 return; 1596 return;
1298 UnimplementedError(Ctx->getFlags()); 1597 UnimplementedError(Ctx->getFlags());
1299 } 1598 }
1300 1599
1301 } // end of namespace Ice 1600 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698