Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 858 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 869 } | 869 } |
| 870 | 870 |
| 871 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) { | 871 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) { |
| 872 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) | 872 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) |
| 873 return false; | 873 return false; |
| 874 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed; | 874 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed; |
| 875 ++NumGPRRegsUsed; | 875 ++NumGPRRegsUsed; |
| 876 return true; | 876 return true; |
| 877 } | 877 } |
| 878 | 878 |
| 879 #define SANITY_CHECK_QS(_0, _1) \ | |
| 880 static_assert((RegARM32::Reg_##_1 + 1) == RegARM32::Reg_##_0, \ | |
| 881 "ARM32 " #_0 " and " #_1 " registers are declared " \ | |
| 882 "incorrectly.") | |
| 883 SANITY_CHECK_QS(q0, q1); | |
| 884 SANITY_CHECK_QS(q1, q2); | |
| 885 SANITY_CHECK_QS(q2, q3); | |
| 886 SANITY_CHECK_QS(q3, q4); | |
| 887 SANITY_CHECK_QS(q4, q5); | |
| 888 SANITY_CHECK_QS(q5, q6); | |
| 889 SANITY_CHECK_QS(q6, q7); | |
| 890 SANITY_CHECK_QS(q7, q8); | |
| 891 SANITY_CHECK_QS(q8, q9); | |
| 892 SANITY_CHECK_QS(q9, q10); | |
| 893 SANITY_CHECK_QS(q10, q11); | |
| 894 SANITY_CHECK_QS(q11, q12); | |
| 895 SANITY_CHECK_QS(q12, q13); | |
| 896 SANITY_CHECK_QS(q13, q14); | |
| 897 SANITY_CHECK_QS(q14, q15); | |
| 898 #undef SANITY_CHECK_QS | |
| 899 #define SANITY_CHECK_DS(_0, _1) \ | |
| 900 static_assert((RegARM32::Reg_##_1 + 1) == RegARM32::Reg_##_0, \ | |
| 901 "ARM32 " #_0 " and " #_1 " registers are declared " \ | |
| 902 "incorrectly.") | |
| 903 SANITY_CHECK_DS(d0, d1); | |
| 904 SANITY_CHECK_DS(d1, d2); | |
| 905 SANITY_CHECK_DS(d2, d3); | |
| 906 SANITY_CHECK_DS(d3, d4); | |
| 907 SANITY_CHECK_DS(d4, d5); | |
| 908 SANITY_CHECK_DS(d5, d6); | |
| 909 SANITY_CHECK_DS(d6, d7); | |
| 910 SANITY_CHECK_DS(d7, d8); | |
| 911 SANITY_CHECK_DS(d8, d9); | |
| 912 SANITY_CHECK_DS(d9, d10); | |
| 913 SANITY_CHECK_DS(d10, d11); | |
| 914 SANITY_CHECK_DS(d11, d12); | |
| 915 SANITY_CHECK_DS(d12, d13); | |
| 916 SANITY_CHECK_DS(d13, d14); | |
| 917 SANITY_CHECK_DS(d14, d15); | |
| 918 SANITY_CHECK_DS(d15, d16); | |
| 919 SANITY_CHECK_DS(d16, d17); | |
| 920 SANITY_CHECK_DS(d17, d18); | |
| 921 SANITY_CHECK_DS(d18, d19); | |
| 922 SANITY_CHECK_DS(d19, d20); | |
| 923 SANITY_CHECK_DS(d20, d21); | |
| 924 SANITY_CHECK_DS(d21, d22); | |
| 925 SANITY_CHECK_DS(d22, d23); | |
| 926 SANITY_CHECK_DS(d23, d24); | |
| 927 SANITY_CHECK_DS(d24, d25); | |
| 928 SANITY_CHECK_DS(d25, d26); | |
| 929 SANITY_CHECK_DS(d26, d27); | |
| 930 SANITY_CHECK_DS(d27, d28); | |
| 931 SANITY_CHECK_DS(d28, d29); | |
| 932 SANITY_CHECK_DS(d29, d30); | |
| 933 SANITY_CHECK_DS(d30, d31); | |
| 934 #undef SANITY_CHECK_DS | |
| 935 #define SANITY_CHECK_SS(_0, _1) \ | |
| 936 static_assert((RegARM32::Reg_##_0 + 1) == RegARM32::Reg_##_1, \ | |
| 937 "ARM32 " #_0 " and " #_1 " registers are declared " \ | |
| 938 "incorrectly.") | |
| 939 SANITY_CHECK_SS(s0, s1); | |
| 940 SANITY_CHECK_SS(s1, s2); | |
| 941 SANITY_CHECK_SS(s2, s3); | |
| 942 SANITY_CHECK_SS(s3, s4); | |
| 943 SANITY_CHECK_SS(s4, s5); | |
| 944 SANITY_CHECK_SS(s5, s6); | |
| 945 SANITY_CHECK_SS(s6, s7); | |
| 946 SANITY_CHECK_SS(s7, s8); | |
| 947 SANITY_CHECK_SS(s8, s9); | |
| 948 SANITY_CHECK_SS(s9, s10); | |
| 949 SANITY_CHECK_SS(s10, s11); | |
| 950 SANITY_CHECK_SS(s11, s12); | |
| 951 SANITY_CHECK_SS(s12, s13); | |
| 952 SANITY_CHECK_SS(s13, s14); | |
| 953 SANITY_CHECK_SS(s14, s15); | |
| 954 SANITY_CHECK_SS(s15, s16); | |
| 955 SANITY_CHECK_SS(s16, s17); | |
| 956 SANITY_CHECK_SS(s17, s18); | |
| 957 SANITY_CHECK_SS(s18, s19); | |
| 958 SANITY_CHECK_SS(s19, s20); | |
| 959 SANITY_CHECK_SS(s20, s21); | |
| 960 SANITY_CHECK_SS(s21, s22); | |
| 961 SANITY_CHECK_SS(s22, s23); | |
| 962 SANITY_CHECK_SS(s23, s24); | |
| 963 SANITY_CHECK_SS(s24, s25); | |
| 964 SANITY_CHECK_SS(s25, s26); | |
| 965 SANITY_CHECK_SS(s26, s27); | |
| 966 SANITY_CHECK_SS(s27, s28); | |
| 967 SANITY_CHECK_SS(s28, s29); | |
| 968 SANITY_CHECK_SS(s29, s30); | |
| 969 SANITY_CHECK_SS(s30, s31); | |
| 970 #undef SANITY_CHECK_SS | |
| 971 | |
| 879 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { | 972 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { |
| 880 if (!VFPRegsFree.any()) { | 973 if (!VFPRegsFree.any()) { |
| 881 return false; | 974 return false; |
| 882 } | 975 } |
| 883 | 976 |
| 884 if (isVectorType(Ty)) { | 977 if (isVectorType(Ty)) { |
| 885 // Q registers are declared in reverse order, so RegARM32::Reg_q0 > | 978 // Q registers are declared in reverse order, so RegARM32::Reg_q0 > |
| 886 // RegARM32::Reg_q1. Therefore, we need to subtract QRegStart from Reg_q0. | 979 // RegARM32::Reg_q1. Therefore, we need to subtract QRegStart from Reg_q0. |
| 887 // Same thing goes for D registers. | 980 // Same thing goes for D registers. |
| 888 static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1, | |
| 889 "ARM32 Q registers are possibly declared incorrectly."); | |
| 890 | 981 |
| 891 int32_t QRegStart = (VFPRegsFree & ValidV128Regs).find_first(); | 982 int32_t QRegStart = (VFPRegsFree & ValidV128Regs).find_first(); |
| 892 if (QRegStart >= 0) { | 983 if (QRegStart >= 0) { |
| 893 VFPRegsFree.reset(QRegStart, QRegStart + 4); | 984 VFPRegsFree.reset(QRegStart, QRegStart + 4); |
| 894 *Reg = RegARM32::Reg_q0 - (QRegStart / 4); | 985 *Reg = RegARM32::Reg_q0 - (QRegStart / 4); |
| 895 return true; | 986 return true; |
| 896 } | 987 } |
| 897 } else if (Ty == IceType_f64) { | 988 } else if (Ty == IceType_f64) { |
| 898 static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1, | |
| 899 "ARM32 D registers are possibly declared incorrectly."); | |
| 900 | |
| 901 int32_t DRegStart = (VFPRegsFree & ValidF64Regs).find_first(); | 989 int32_t DRegStart = (VFPRegsFree & ValidF64Regs).find_first(); |
| 902 if (DRegStart >= 0) { | 990 if (DRegStart >= 0) { |
| 903 VFPRegsFree.reset(DRegStart, DRegStart + 2); | 991 VFPRegsFree.reset(DRegStart, DRegStart + 2); |
| 904 *Reg = RegARM32::Reg_d0 - (DRegStart / 2); | 992 *Reg = RegARM32::Reg_d0 - (DRegStart / 2); |
| 905 return true; | 993 return true; |
| 906 } | 994 } |
| 907 } else { | 995 } else { |
| 908 static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1, | |
| 909 "ARM32 S registers are possibly declared incorrectly."); | |
| 910 | |
| 911 assert(Ty == IceType_f32); | 996 assert(Ty == IceType_f32); |
| 912 int32_t SReg = VFPRegsFree.find_first(); | 997 int32_t SReg = VFPRegsFree.find_first(); |
| 913 assert(SReg >= 0); | 998 assert(SReg >= 0); |
| 914 VFPRegsFree.reset(SReg); | 999 VFPRegsFree.reset(SReg); |
| 915 *Reg = RegARM32::Reg_s0 + SReg; | 1000 *Reg = RegARM32::Reg_s0 + SReg; |
| 916 return true; | 1001 return true; |
| 917 } | 1002 } |
| 918 | 1003 |
| 919 // Parameter allocation failed. From now on, every fp register must be placed | 1004 // Parameter allocation failed. From now on, every fp register must be placed |
| 920 // on the stack. We clear VFRegsFree in case there are any "holes" from S and | 1005 // on the stack. We clear VFRegsFree in case there are any "holes" from S and |
| (...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1089 | 1174 |
| 1090 // Compute the list of spilled variables and bounds for GlobalsSize, etc. | 1175 // Compute the list of spilled variables and bounds for GlobalsSize, etc. |
| 1091 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, | 1176 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, |
| 1092 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, | 1177 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, |
| 1093 &LocalsSlotsAlignmentBytes, TargetVarHook); | 1178 &LocalsSlotsAlignmentBytes, TargetVarHook); |
| 1094 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; | 1179 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; |
| 1095 SpillAreaSizeBytes += GlobalsSize; | 1180 SpillAreaSizeBytes += GlobalsSize; |
| 1096 | 1181 |
| 1097 // Add push instructions for preserved registers. On ARM, "push" can push a | 1182 // Add push instructions for preserved registers. On ARM, "push" can push a |
| 1098 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has | 1183 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has |
| 1099 // callee-saved float/vector registers. The "vpush" instruction can handle a | 1184 // callee-saved float/vector registers. |
|
Jim Stichnoth
2015/12/07 20:58:14
At first glance, this looks like a reflow error.
John
2015/12/08 13:54:25
Done.
| |
| 1100 // whole list of float/vector registers, but it only handles contiguous | 1185 // The "vpush" instruction can handle a whole list of float/vector registers, |
| 1101 // sequences of registers by specifying the start and the length. | 1186 // but it only handles contiguous sequences of registers by specifying the |
| 1102 VarList GPRsToPreserve; | 1187 // start and the length. |
| 1103 GPRsToPreserve.reserve(CalleeSaves.size()); | 1188 PreservedGPRs.reserve(CalleeSaves.size()); |
| 1104 uint32_t NumCallee = 0; | 1189 PreservedSRegs.reserve(CalleeSaves.size()); |
| 1105 size_t PreservedRegsSizeBytes = 0; | 1190 |
| 1106 // Consider FP and LR as callee-save / used as needed. | 1191 // Consider FP and LR as callee-save / used as needed. |
| 1107 if (UsesFramePointer) { | 1192 if (UsesFramePointer) { |
| 1193 if (RegsUsed[RegARM32::Reg_fp]) { | |
| 1194 llvm::report_fatal_error("Frame pointer has been used."); | |
| 1195 } | |
| 1108 CalleeSaves[RegARM32::Reg_fp] = true; | 1196 CalleeSaves[RegARM32::Reg_fp] = true; |
| 1109 assert(RegsUsed[RegARM32::Reg_fp] == false); | |
| 1110 RegsUsed[RegARM32::Reg_fp] = true; | 1197 RegsUsed[RegARM32::Reg_fp] = true; |
| 1111 } | 1198 } |
| 1112 if (!MaybeLeafFunc) { | 1199 if (!MaybeLeafFunc) { |
| 1113 CalleeSaves[RegARM32::Reg_lr] = true; | 1200 CalleeSaves[RegARM32::Reg_lr] = true; |
| 1114 RegsUsed[RegARM32::Reg_lr] = true; | 1201 RegsUsed[RegARM32::Reg_lr] = true; |
| 1115 } | 1202 } |
| 1203 | |
| 1204 // Perform a two-pass over the used registers. The first pass records all the | |
|
Jim Stichnoth
2015/12/07 20:58:14
"Make two passes over the used registers."
John
2015/12/08 13:54:25
Done.
| |
| 1205 // used registers -- and their aliases. Then, we figure out which GPRs and | |
| 1206 // VFP S registers should be saved. We don't bother saving D/Q registers | |
| 1207 // because their uses are recorded as S regs uses. | |
| 1208 llvm::SmallBitVector ToPreserve(RegARM32::Reg_NUM); | |
| 1116 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | 1209 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
| 1117 if (RegARM32::isI64RegisterPair(i)) { | 1210 if (NeedSandboxing && i == RegARM32::Reg_r9) { |
| 1118 // We don't save register pairs explicitly. Instead, we rely on the code | 1211 // r9 is never updated in sandboxed code. |
| 1119 // fake-defing/fake-using each register in the pair. | |
| 1120 continue; | 1212 continue; |
| 1121 } | 1213 } |
| 1122 if (CalleeSaves[i] && RegsUsed[i]) { | 1214 if (CalleeSaves[i] && RegsUsed[i]) { |
| 1123 if (NeedSandboxing && i == RegARM32::Reg_r9) { | 1215 ToPreserve |= RegisterAliases[i]; |
| 1124 // r9 is never updated in sandboxed code. | 1216 } |
| 1217 } | |
| 1218 | |
| 1219 uint32_t NumCallee = 0; | |
| 1220 size_t PreservedRegsSizeBytes = 0; | |
| 1221 | |
| 1222 // RegClasses is a tuple of | |
| 1223 // | |
| 1224 // <First Register in Class, Last Register in Class, Vector of Save Registers> | |
| 1225 // | |
| 1226 // We use this tuple to figure out which register we should push/pop during | |
| 1227 // prolog/epilog. | |
| 1228 const std::tuple<uint32_t, uint32_t, VarList *> RegClasses[] = { | |
| 1229 {RegARM32::Reg_GPR_First, RegARM32::Reg_GPR_Last, &PreservedGPRs}, | |
| 1230 {RegARM32::Reg_SREG_First, RegARM32::Reg_SREG_Last, &PreservedSRegs}}; | |
| 1231 for (const auto &RegClass : RegClasses) { | |
| 1232 const uint32_t FirstRegInClass = std::get<0>(RegClass); | |
| 1233 const uint32_t LastRegInClass = std::get<1>(RegClass); | |
| 1234 VarList *const PreservedRegsInClass = std::get<2>(RegClass); | |
| 1235 for (uint32_t Reg = FirstRegInClass; Reg <= LastRegInClass; ++Reg) { | |
| 1236 if (!ToPreserve[Reg]) { | |
| 1125 continue; | 1237 continue; |
| 1126 } | 1238 } |
| 1127 ++NumCallee; | 1239 ++NumCallee; |
| 1128 Variable *PhysicalRegister = getPhysicalRegister(i); | 1240 Variable *PhysicalRegister = getPhysicalRegister(Reg); |
| 1129 PreservedRegsSizeBytes += | 1241 PreservedRegsSizeBytes += |
| 1130 typeWidthInBytesOnStack(PhysicalRegister->getType()); | 1242 typeWidthInBytesOnStack(PhysicalRegister->getType()); |
| 1131 GPRsToPreserve.push_back(getPhysicalRegister(i)); | 1243 PreservedRegsInClass->push_back(PhysicalRegister); |
| 1132 } | 1244 } |
| 1133 } | 1245 } |
| 1246 | |
| 1134 Ctx->statsUpdateRegistersSaved(NumCallee); | 1247 Ctx->statsUpdateRegistersSaved(NumCallee); |
| 1135 if (!GPRsToPreserve.empty()) | 1248 if (!PreservedSRegs.empty()) |
| 1136 _push(GPRsToPreserve); | 1249 _push(PreservedSRegs); |
| 1250 if (!PreservedGPRs.empty()) | |
| 1251 _push(PreservedGPRs); | |
| 1137 | 1252 |
| 1138 // Generate "mov FP, SP" if needed. | 1253 // Generate "mov FP, SP" if needed. |
| 1139 if (UsesFramePointer) { | 1254 if (UsesFramePointer) { |
| 1140 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); | 1255 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); |
| 1141 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 1256 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
| 1142 _mov(FP, SP); | 1257 _mov(FP, SP); |
| 1143 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). | 1258 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). |
| 1144 Context.insert(InstFakeUse::create(Func, FP)); | 1259 Context.insert(InstFakeUse::create(Func, FP)); |
| 1145 } | 1260 } |
| 1146 | 1261 |
| 1147 // Align the variables area. SpillAreaPaddingBytes is the size of the region | 1262 // Align the variables area. SpillAreaPaddingBytes is the size of the region |
| 1148 // after the preserved registers and before the spill areas. | 1263 // after the preserved registers and before the spill areas. |
| 1149 // LocalsSlotsPaddingBytes is the amount of padding between the globals and | 1264 // LocalsSlotsPaddingBytes is the amount of padding between the globals and |
| 1150 // locals area if they are separate. | 1265 // locals area if they are separate. |
| 1151 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES); | 1266 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES); |
| 1152 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); | 1267 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); |
| 1153 uint32_t SpillAreaPaddingBytes = 0; | 1268 uint32_t SpillAreaPaddingBytes = 0; |
| 1154 uint32_t LocalsSlotsPaddingBytes = 0; | 1269 uint32_t LocalsSlotsPaddingBytes = 0; |
| 1155 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, | 1270 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, |
| 1156 GlobalsSize, LocalsSlotsAlignmentBytes, | 1271 GlobalsSize, LocalsSlotsAlignmentBytes, |
| 1157 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); | 1272 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); |
| 1158 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; | 1273 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; |
| 1159 uint32_t GlobalsAndSubsequentPaddingSize = | 1274 uint32_t GlobalsAndSubsequentPaddingSize = |
| 1160 GlobalsSize + LocalsSlotsPaddingBytes; | 1275 GlobalsSize + LocalsSlotsPaddingBytes; |
| 1161 | 1276 |
| 1162 // Adds the out args space to the stack, and align SP if necessary. | 1277 // Adds the out args space to the stack, and align SP if necessary. |
| 1163 if (NeedsStackAlignment) { | 1278 if (!NeedsStackAlignment) { |
| 1279 SpillAreaSizeBytes += MaxOutArgsSizeBytes; | |
| 1280 } else { | |
| 1164 uint32_t StackOffset = PreservedRegsSizeBytes; | 1281 uint32_t StackOffset = PreservedRegsSizeBytes; |
| 1165 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); | 1282 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); |
| 1166 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes); | 1283 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes); |
| 1167 SpillAreaSizeBytes = StackSize - StackOffset; | 1284 SpillAreaSizeBytes = StackSize - StackOffset; |
| 1168 } else { | |
| 1169 SpillAreaSizeBytes += MaxOutArgsSizeBytes; | |
| 1170 } | 1285 } |
| 1171 | 1286 |
| 1172 // Combine fixed alloca with SpillAreaSize. | 1287 // Combine fixed alloca with SpillAreaSize. |
| 1173 SpillAreaSizeBytes += FixedAllocaSizeBytes; | 1288 SpillAreaSizeBytes += FixedAllocaSizeBytes; |
| 1174 | 1289 |
| 1175 // Generate "sub sp, SpillAreaSizeBytes" | 1290 // Generate "sub sp, SpillAreaSizeBytes" |
| 1176 if (SpillAreaSizeBytes) { | 1291 if (SpillAreaSizeBytes) { |
| 1177 // Use the scratch register if needed to legalize the immediate. | 1292 // Use the scratch register if needed to legalize the immediate. |
| 1178 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), | 1293 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
| 1179 Legal_Reg | Legal_Flex, getReservedTmpReg()); | 1294 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
| (...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1278 // add SP, SpillAreaSizeBytes | 1393 // add SP, SpillAreaSizeBytes |
| 1279 if (SpillAreaSizeBytes) { | 1394 if (SpillAreaSizeBytes) { |
| 1280 // Use the scratch register if needed to legalize the immediate. | 1395 // Use the scratch register if needed to legalize the immediate. |
| 1281 Operand *AddAmount = | 1396 Operand *AddAmount = |
| 1282 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), | 1397 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
| 1283 Legal_Reg | Legal_Flex, getReservedTmpReg()); | 1398 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
| 1284 Sandboxer(this).add_sp(AddAmount); | 1399 Sandboxer(this).add_sp(AddAmount); |
| 1285 } | 1400 } |
| 1286 } | 1401 } |
| 1287 | 1402 |
| 1288 // Add pop instructions for preserved registers. | 1403 if (!PreservedGPRs.empty()) |
| 1289 llvm::SmallBitVector CalleeSaves = | 1404 _pop(PreservedGPRs); |
| 1290 getRegisterSet(RegSet_CalleeSave, RegSet_None); | 1405 if (!PreservedSRegs.empty()) |
| 1291 VarList GPRsToRestore; | 1406 _pop(PreservedSRegs); |
| 1292 GPRsToRestore.reserve(CalleeSaves.size()); | |
| 1293 // Consider FP and LR as callee-save / used as needed. | |
| 1294 if (UsesFramePointer) { | |
| 1295 CalleeSaves[RegARM32::Reg_fp] = true; | |
| 1296 } | |
| 1297 if (!MaybeLeafFunc) { | |
| 1298 CalleeSaves[RegARM32::Reg_lr] = true; | |
| 1299 } | |
| 1300 // Pop registers in ascending order just like push (instead of in reverse | |
| 1301 // order). | |
| 1302 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | |
| 1303 if (RegARM32::isI64RegisterPair(i)) { | |
| 1304 continue; | |
| 1305 } | |
| 1306 | |
| 1307 if (CalleeSaves[i] && RegsUsed[i]) { | |
| 1308 if (NeedSandboxing && i == RegARM32::Reg_r9) { | |
| 1309 continue; | |
| 1310 } | |
| 1311 GPRsToRestore.push_back(getPhysicalRegister(i)); | |
| 1312 } | |
| 1313 } | |
| 1314 if (!GPRsToRestore.empty()) | |
| 1315 _pop(GPRsToRestore); | |
| 1316 | 1407 |
| 1317 if (!Ctx->getFlags().getUseSandboxing()) | 1408 if (!Ctx->getFlags().getUseSandboxing()) |
| 1318 return; | 1409 return; |
| 1319 | 1410 |
| 1320 // Change the original ret instruction into a sandboxed return sequence. | 1411 // Change the original ret instruction into a sandboxed return sequence. |
| 1412 // | |
| 1321 // bundle_lock | 1413 // bundle_lock |
| 1322 // bic lr, #0xc000000f | 1414 // bic lr, #0xc000000f |
| 1323 // bx lr | 1415 // bx lr |
| 1324 // bundle_unlock | 1416 // bundle_unlock |
| 1417 // | |
| 1325 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to | 1418 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to |
| 1326 // restrict to the lower 1GB as well. | 1419 // restrict to the lower 1GB as well. |
| 1327 Variable *LR = getPhysicalRegister(RegARM32::Reg_lr); | 1420 Variable *LR = getPhysicalRegister(RegARM32::Reg_lr); |
| 1328 Variable *RetValue = nullptr; | 1421 Variable *RetValue = nullptr; |
| 1329 if (RI->getSrcSize()) | 1422 if (RI->getSrcSize()) |
| 1330 RetValue = llvm::cast<Variable>(RI->getSrc(0)); | 1423 RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
| 1331 | 1424 |
| 1332 Sandboxer(this).ret(LR, RetValue); | 1425 Sandboxer(this).ret(LR, RetValue); |
| 1333 | 1426 |
| 1334 RI->setDeleted(); | 1427 RI->setDeleted(); |
| (...skipping 1299 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2634 Operations)) { | 2727 Operations)) { |
| 2635 return false; | 2728 return false; |
| 2636 } | 2729 } |
| 2637 } | 2730 } |
| 2638 | 2731 |
| 2639 return Src == 0; | 2732 return Src == 0; |
| 2640 } | 2733 } |
| 2641 } // end of namespace StrengthReduction | 2734 } // end of namespace StrengthReduction |
| 2642 } // end of anonymous namespace | 2735 } // end of anonymous namespace |
| 2643 | 2736 |
| 2644 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { | 2737 void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) { |
| 2645 Variable *Dest = Inst->getDest(); | 2738 Variable *Dest = Instr->getDest(); |
| 2646 | 2739 |
| 2647 if (Dest->isRematerializable()) { | 2740 if (Dest->isRematerializable()) { |
| 2648 Context.insert(InstFakeDef::create(Func, Dest)); | 2741 Context.insert(InstFakeDef::create(Func, Dest)); |
| 2649 return; | 2742 return; |
| 2650 } | 2743 } |
| 2651 | 2744 |
| 2652 Type DestTy = Dest->getType(); | 2745 Type DestTy = Dest->getType(); |
| 2653 if (DestTy == IceType_i1) { | 2746 if (DestTy == IceType_i1) { |
| 2654 lowerInt1Arithmetic(Inst); | 2747 lowerInt1Arithmetic(Instr); |
| 2655 return; | 2748 return; |
| 2656 } | 2749 } |
| 2657 | 2750 |
| 2658 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 2751 Operand *Src0 = legalizeUndef(Instr->getSrc(0)); |
| 2659 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | 2752 Operand *Src1 = legalizeUndef(Instr->getSrc(1)); |
| 2660 if (DestTy == IceType_i64) { | 2753 if (DestTy == IceType_i64) { |
| 2661 lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1); | 2754 lowerInt64Arithmetic(Instr->getOp(), Instr->getDest(), Src0, Src1); |
| 2662 return; | 2755 return; |
| 2663 } | 2756 } |
| 2664 | 2757 |
| 2665 if (isVectorType(DestTy)) { | 2758 if (isVectorType(DestTy)) { |
| 2666 // Add a fake def to keep liveness consistent in the meantime. | 2759 // Add a fake def to keep liveness consistent in the meantime. |
| 2667 Variable *T = makeReg(DestTy); | 2760 Variable *T = makeReg(DestTy); |
| 2668 Context.insert(InstFakeDef::create(Func, T)); | 2761 Context.insert(InstFakeDef::create(Func, T)); |
| 2669 _mov(Dest, T); | 2762 _mov(Dest, T); |
| 2670 UnimplementedError(Func->getContext()->getFlags()); | 2763 UnimplementedError(Func->getContext()->getFlags()); |
| 2671 return; | 2764 return; |
| 2672 } | 2765 } |
| 2673 | 2766 |
| 2674 // DestTy is a non-i64 scalar. | 2767 // DestTy is a non-i64 scalar. |
| 2675 Variable *T = makeReg(DestTy); | 2768 Variable *T = makeReg(DestTy); |
| 2676 | 2769 |
| 2677 // * Handle div/rem separately. They require a non-legalized Src1 to inspect | 2770 // * Handle div/rem separately. They require a non-legalized Src1 to inspect |
| 2678 // whether or not Src1 is a non-zero constant. Once legalized it is more | 2771 // whether or not Src1 is a non-zero constant. Once legalized it is more |
| 2679 // difficult to determine (constant may be moved to a register). | 2772 // difficult to determine (constant may be moved to a register). |
| 2680 // * Handle floating point arithmetic separately: they require Src1 to be | 2773 // * Handle floating point arithmetic separately: they require Src1 to be |
| 2681 // legalized to a register. | 2774 // legalized to a register. |
| 2682 switch (Inst->getOp()) { | 2775 switch (Instr->getOp()) { |
| 2683 default: | 2776 default: |
| 2684 break; | 2777 break; |
| 2685 case InstArithmetic::Udiv: { | 2778 case InstArithmetic::Udiv: { |
| 2686 constexpr bool NotRemainder = false; | 2779 constexpr bool NotRemainder = false; |
| 2687 Variable *Src0R = legalizeToReg(Src0); | 2780 Variable *Src0R = legalizeToReg(Src0); |
| 2688 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, | 2781 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, |
| 2689 NotRemainder); | 2782 NotRemainder); |
| 2690 return; | 2783 return; |
| 2691 } | 2784 } |
| 2692 case InstArithmetic::Sdiv: { | 2785 case InstArithmetic::Sdiv: { |
| (...skipping 18 matching lines...) Expand all Loading... | |
| 2711 return; | 2804 return; |
| 2712 } | 2805 } |
| 2713 case InstArithmetic::Frem: { | 2806 case InstArithmetic::Frem: { |
| 2714 if (!isScalarFloatingType(DestTy)) { | 2807 if (!isScalarFloatingType(DestTy)) { |
| 2715 llvm::report_fatal_error("Unexpected type when lowering frem."); | 2808 llvm::report_fatal_error("Unexpected type when lowering frem."); |
| 2716 } | 2809 } |
| 2717 llvm::report_fatal_error("Frem should have already been lowered."); | 2810 llvm::report_fatal_error("Frem should have already been lowered."); |
| 2718 } | 2811 } |
| 2719 case InstArithmetic::Fadd: { | 2812 case InstArithmetic::Fadd: { |
| 2720 Variable *Src0R = legalizeToReg(Src0); | 2813 Variable *Src0R = legalizeToReg(Src0); |
| 2814 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) { | |
| 2815 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0)); | |
| 2816 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1)); | |
| 2817 _vmla(Src0R, Src1R, Src2R); | |
| 2818 _mov(Dest, Src0R); | |
| 2819 return; | |
| 2820 } | |
| 2821 | |
| 2721 Variable *Src1R = legalizeToReg(Src1); | 2822 Variable *Src1R = legalizeToReg(Src1); |
| 2722 _vadd(T, Src0R, Src1R); | 2823 _vadd(T, Src0R, Src1R); |
| 2723 _mov(Dest, T); | 2824 _mov(Dest, T); |
| 2724 return; | 2825 return; |
| 2725 } | 2826 } |
| 2726 case InstArithmetic::Fsub: { | 2827 case InstArithmetic::Fsub: { |
| 2727 Variable *Src0R = legalizeToReg(Src0); | 2828 Variable *Src0R = legalizeToReg(Src0); |
| 2829 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) { | |
| 2830 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0)); | |
| 2831 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1)); | |
| 2832 _vmls(Src0R, Src1R, Src2R); | |
| 2833 _mov(Dest, Src0R); | |
| 2834 return; | |
| 2835 } | |
| 2728 Variable *Src1R = legalizeToReg(Src1); | 2836 Variable *Src1R = legalizeToReg(Src1); |
| 2729 _vsub(T, Src0R, Src1R); | 2837 _vsub(T, Src0R, Src1R); |
| 2730 _mov(Dest, T); | 2838 _mov(Dest, T); |
| 2731 return; | 2839 return; |
| 2732 } | 2840 } |
| 2733 case InstArithmetic::Fmul: { | 2841 case InstArithmetic::Fmul: { |
| 2734 Variable *Src0R = legalizeToReg(Src0); | 2842 Variable *Src0R = legalizeToReg(Src0); |
| 2735 Variable *Src1R = legalizeToReg(Src1); | 2843 Variable *Src1R = legalizeToReg(Src1); |
| 2736 _vmul(T, Src0R, Src1R); | 2844 _vmul(T, Src0R, Src1R); |
| 2737 _mov(Dest, T); | 2845 _mov(Dest, T); |
| 2738 return; | 2846 return; |
| 2739 } | 2847 } |
| 2740 case InstArithmetic::Fdiv: { | 2848 case InstArithmetic::Fdiv: { |
| 2741 Variable *Src0R = legalizeToReg(Src0); | 2849 Variable *Src0R = legalizeToReg(Src0); |
| 2742 Variable *Src1R = legalizeToReg(Src1); | 2850 Variable *Src1R = legalizeToReg(Src1); |
| 2743 _vdiv(T, Src0R, Src1R); | 2851 _vdiv(T, Src0R, Src1R); |
| 2744 _mov(Dest, T); | 2852 _mov(Dest, T); |
| 2745 return; | 2853 return; |
| 2746 } | 2854 } |
| 2747 } | 2855 } |
| 2748 | 2856 |
| 2749 // Handle everything else here. | 2857 // Handle everything else here. |
| 2750 Int32Operands Srcs(Src0, Src1); | 2858 Int32Operands Srcs(Src0, Src1); |
| 2751 switch (Inst->getOp()) { | 2859 switch (Instr->getOp()) { |
| 2752 case InstArithmetic::_num: | 2860 case InstArithmetic::_num: |
| 2753 llvm::report_fatal_error("Unknown arithmetic operator"); | 2861 llvm::report_fatal_error("Unknown arithmetic operator"); |
| 2754 return; | 2862 return; |
| 2755 case InstArithmetic::Add: { | 2863 case InstArithmetic::Add: { |
| 2864 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) { | |
| 2865 Variable *Src0R = legalizeToReg(Src0); | |
| 2866 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0)); | |
| 2867 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1)); | |
| 2868 _mla(T, Src1R, Src2R, Src0R); | |
| 2869 _mov(Dest, T); | |
| 2870 return; | |
| 2871 } | |
| 2872 | |
| 2756 if (Srcs.hasConstOperand()) { | 2873 if (Srcs.hasConstOperand()) { |
| 2757 if (!Srcs.immediateIsFlexEncodable() && | 2874 if (!Srcs.immediateIsFlexEncodable() && |
| 2758 Srcs.negatedImmediateIsFlexEncodable()) { | 2875 Srcs.negatedImmediateIsFlexEncodable()) { |
| 2759 Variable *Src0R = Srcs.src0R(this); | 2876 Variable *Src0R = Srcs.src0R(this); |
| 2760 Operand *Src1F = Srcs.negatedSrc1F(this); | 2877 Operand *Src1F = Srcs.negatedSrc1F(this); |
| 2761 if (!Srcs.swappedOperands()) { | 2878 if (!Srcs.swappedOperands()) { |
| 2762 _sub(T, Src0R, Src1F); | 2879 _sub(T, Src0R, Src1F); |
| 2763 } else { | 2880 } else { |
| 2764 _rsb(T, Src0R, Src1F); | 2881 _rsb(T, Src0R, Src1F); |
| 2765 } | 2882 } |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2798 return; | 2915 return; |
| 2799 } | 2916 } |
| 2800 case InstArithmetic::Xor: { | 2917 case InstArithmetic::Xor: { |
| 2801 Variable *Src0R = Srcs.src0R(this); | 2918 Variable *Src0R = Srcs.src0R(this); |
| 2802 Operand *Src1RF = Srcs.src1RF(this); | 2919 Operand *Src1RF = Srcs.src1RF(this); |
| 2803 _eor(T, Src0R, Src1RF); | 2920 _eor(T, Src0R, Src1RF); |
| 2804 _mov(Dest, T); | 2921 _mov(Dest, T); |
| 2805 return; | 2922 return; |
| 2806 } | 2923 } |
| 2807 case InstArithmetic::Sub: { | 2924 case InstArithmetic::Sub: { |
| 2925 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) { | |
| 2926 Variable *Src0R = legalizeToReg(Src0); | |
| 2927 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0)); | |
| 2928 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1)); | |
| 2929 _mls(T, Src1R, Src2R, Src0R); | |
| 2930 _mov(Dest, T); | |
| 2931 return; | |
| 2932 } | |
| 2933 | |
| 2808 if (Srcs.hasConstOperand()) { | 2934 if (Srcs.hasConstOperand()) { |
| 2809 if (Srcs.immediateIsFlexEncodable()) { | 2935 if (Srcs.immediateIsFlexEncodable()) { |
| 2810 Variable *Src0R = Srcs.src0R(this); | 2936 Variable *Src0R = Srcs.src0R(this); |
| 2811 Operand *Src1RF = Srcs.src1RF(this); | 2937 Operand *Src1RF = Srcs.src1RF(this); |
| 2812 if (Srcs.swappedOperands()) { | 2938 if (Srcs.swappedOperands()) { |
| 2813 _rsb(T, Src0R, Src1RF); | 2939 _rsb(T, Src0R, Src1RF); |
| 2814 } else { | 2940 } else { |
| 2815 _sub(T, Src0R, Src1RF); | 2941 _sub(T, Src0R, Src1RF); |
| 2816 } | 2942 } |
| 2817 _mov(Dest, T); | 2943 _mov(Dest, T); |
| (...skipping 188 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3006 } | 3132 } |
| 3007 _mov(Dest, NewSrc); | 3133 _mov(Dest, NewSrc); |
| 3008 } | 3134 } |
| 3009 | 3135 |
| 3010 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( | 3136 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( |
| 3011 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, | 3137 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, |
| 3012 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { | 3138 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { |
| 3013 InstARM32Label *NewShortCircuitLabel = nullptr; | 3139 InstARM32Label *NewShortCircuitLabel = nullptr; |
| 3014 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); | 3140 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); |
| 3015 | 3141 |
| 3016 const Inst *Producer = BoolComputations.getProducerOf(Boolean); | 3142 const Inst *Producer = Computations.getProducerOf(Boolean); |
| 3017 | 3143 |
| 3018 if (Producer == nullptr) { | 3144 if (Producer == nullptr) { |
| 3019 // No producer, no problem: just do emit code to perform (Boolean & 1) and | 3145 // No producer, no problem: just do emit code to perform (Boolean & 1) and |
| 3020 // set the flags register. The branch should be taken if the resulting flags | 3146 // set the flags register. The branch should be taken if the resulting flags |
| 3021 // indicate a non-zero result. | 3147 // indicate a non-zero result. |
| 3022 _tst(legalizeToReg(Boolean), _1); | 3148 _tst(legalizeToReg(Boolean), _1); |
| 3023 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE)); | 3149 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE)); |
| 3024 } | 3150 } |
| 3025 | 3151 |
| 3026 switch (Producer->getKind()) { | 3152 switch (Producer->getKind()) { |
| (...skipping 200 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3227 Variable *ReturnReg = nullptr; | 3353 Variable *ReturnReg = nullptr; |
| 3228 Variable *ReturnRegHi = nullptr; | 3354 Variable *ReturnRegHi = nullptr; |
| 3229 if (Dest) { | 3355 if (Dest) { |
| 3230 switch (Dest->getType()) { | 3356 switch (Dest->getType()) { |
| 3231 case IceType_NUM: | 3357 case IceType_NUM: |
| 3232 llvm::report_fatal_error("Invalid Call dest type"); | 3358 llvm::report_fatal_error("Invalid Call dest type"); |
| 3233 break; | 3359 break; |
| 3234 case IceType_void: | 3360 case IceType_void: |
| 3235 break; | 3361 break; |
| 3236 case IceType_i1: | 3362 case IceType_i1: |
| 3237 assert(BoolComputations.getProducerOf(Dest) == nullptr); | 3363 assert(Computations.getProducerOf(Dest) == nullptr); |
| 3238 // Fall-through intended. | 3364 // Fall-through intended. |
| 3239 case IceType_i8: | 3365 case IceType_i8: |
| 3240 case IceType_i16: | 3366 case IceType_i16: |
| 3241 case IceType_i32: | 3367 case IceType_i32: |
| 3242 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0); | 3368 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0); |
| 3243 break; | 3369 break; |
| 3244 case IceType_i64: | 3370 case IceType_i64: |
| 3245 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0); | 3371 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0); |
| 3246 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1); | 3372 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1); |
| 3247 break; | 3373 break; |
| (...skipping 2157 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5405 } | 5531 } |
| 5406 | 5532 |
| 5407 if (llvm::isa<Constant>(From)) { | 5533 if (llvm::isa<Constant>(From)) { |
| 5408 if (llvm::isa<ConstantUndef>(From)) { | 5534 if (llvm::isa<ConstantUndef>(From)) { |
| 5409 From = legalizeUndef(From, RegNum); | 5535 From = legalizeUndef(From, RegNum); |
| 5410 if (isVectorType(Ty)) | 5536 if (isVectorType(Ty)) |
| 5411 return From; | 5537 return From; |
| 5412 } | 5538 } |
| 5413 // There should be no constants of vector type (other than undef). | 5539 // There should be no constants of vector type (other than undef). |
| 5414 assert(!isVectorType(Ty)); | 5540 assert(!isVectorType(Ty)); |
| 5415 bool CanBeFlex = Allowed & Legal_Flex; | |
| 5416 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { | 5541 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { |
| 5417 uint32_t RotateAmt; | 5542 uint32_t RotateAmt; |
| 5418 uint32_t Immed_8; | 5543 uint32_t Immed_8; |
| 5419 uint32_t Value = static_cast<uint32_t>(C32->getValue()); | 5544 uint32_t Value = static_cast<uint32_t>(C32->getValue()); |
| 5420 // Check if the immediate will fit in a Flexible second operand, if a | 5545 if (OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) { |
| 5421 // Flexible second operand is allowed. We need to know the exact value, | 5546 // The immediate can be encoded as a Flex immediate. We may return the |
| 5422 // so that rules out relocatable constants. Also try the inverse and use | 5547 // Flex operand if the caller has Allow'ed it. |
| 5423 // MVN if possible. | 5548 auto *OpF = OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); |
| 5424 if (CanBeFlex && | 5549 const bool CanBeFlex = Allowed & Legal_Flex; |
| 5425 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) { | 5550 if (CanBeFlex) |
| 5426 return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); | 5551 return OpF; |
| 5427 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm( | 5552 return copyToReg(OpF, RegNum); |
| 5428 ~Value, &RotateAmt, &Immed_8)) { | 5553 } else if (OperandARM32FlexImm::canHoldImm(~Value, &RotateAmt, |
|
Jim Stichnoth
2015/12/07 20:58:14
No "else if" after unconditional return.
John
2015/12/08 13:54:24
In my defense, this was here prior to this CL. :P
| |
| 5429 auto InvertedFlex = | 5554 &Immed_8)) { |
| 5555 // Even though the immediate can't be encoded as a Flex operand, its | |
| 5556 // inverted bit pattern can, thus we use ARM's mvn to load the 32-bit | |
| 5557 // constant with a single instruction. | |
| 5558 auto *InvOpF = | |
| 5430 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); | 5559 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); |
| 5431 Variable *Reg = makeReg(Ty, RegNum); | 5560 Variable *Reg = makeReg(Ty, RegNum); |
| 5432 _mvn(Reg, InvertedFlex); | 5561 _mvn(Reg, InvOpF); |
| 5433 return Reg; | 5562 return Reg; |
| 5434 } else { | 5563 } else { |
| 5435 // Do a movw/movt to a register. | 5564 // Do a movw/movt to a register. |
| 5436 Variable *Reg = makeReg(Ty, RegNum); | 5565 Variable *Reg = makeReg(Ty, RegNum); |
| 5437 uint32_t UpperBits = (Value >> 16) & 0xFFFF; | 5566 uint32_t UpperBits = (Value >> 16) & 0xFFFF; |
| 5438 _movw(Reg, | 5567 _movw(Reg, |
| 5439 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); | 5568 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); |
| 5440 if (UpperBits != 0) { | 5569 if (UpperBits != 0) { |
| 5441 _movt(Reg, Ctx->getConstantInt32(UpperBits)); | 5570 _movt(Reg, Ctx->getConstantInt32(UpperBits)); |
| 5442 } | 5571 } |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5479 return copyToReg(From, RegNum); | 5608 return copyToReg(From, RegNum); |
| 5480 } | 5609 } |
| 5481 } | 5610 } |
| 5482 | 5611 |
| 5483 if (auto *Var = llvm::dyn_cast<Variable>(From)) { | 5612 if (auto *Var = llvm::dyn_cast<Variable>(From)) { |
| 5484 if (Var->isRematerializable()) { | 5613 if (Var->isRematerializable()) { |
| 5485 if (Allowed & Legal_Rematerializable) { | 5614 if (Allowed & Legal_Rematerializable) { |
| 5486 return From; | 5615 return From; |
| 5487 } | 5616 } |
| 5488 | 5617 |
| 5489 // TODO(jpp): We don't need to rematerialize Var if legalize() was invoked | |
| 5490 // for a Variable in a Mem operand. | |
| 5491 Variable *T = makeReg(Var->getType(), RegNum); | 5618 Variable *T = makeReg(Var->getType(), RegNum); |
| 5492 _mov(T, Var); | 5619 _mov(T, Var); |
| 5493 return T; | 5620 return T; |
| 5494 } | 5621 } |
| 5495 // Check if the variable is guaranteed a physical register. This can happen | 5622 // Check if the variable is guaranteed a physical register. This can happen |
| 5496 // either when the variable is pre-colored or when it is assigned infinite | 5623 // either when the variable is pre-colored or when it is assigned infinite |
| 5497 // weight. | 5624 // weight. |
| 5498 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); | 5625 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); |
| 5499 // We need a new physical register for the operand if: | 5626 // We need a new physical register for the operand if: |
| 5500 // Mem is not allowed and Var isn't guaranteed a physical | 5627 // Mem is not allowed and Var isn't guaranteed a physical |
| (...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5681 | 5808 |
| 5682 _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex)); | 5809 _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex)); |
| 5683 if (DestHi) { | 5810 if (DestHi) { |
| 5684 _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex)); | 5811 _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex)); |
| 5685 } | 5812 } |
| 5686 | 5813 |
| 5687 CondWhenTrue Cond(CondARM32::kNone); | 5814 CondWhenTrue Cond(CondARM32::kNone); |
| 5688 // FlagsWereSet is used to determine wether Boolean was folded or not. If not, | 5815 // FlagsWereSet is used to determine wether Boolean was folded or not. If not, |
| 5689 // add an explicit _tst instruction below. | 5816 // add an explicit _tst instruction below. |
| 5690 bool FlagsWereSet = false; | 5817 bool FlagsWereSet = false; |
| 5691 if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) { | 5818 if (const Inst *Producer = Computations.getProducerOf(Boolean)) { |
| 5692 switch (Producer->getKind()) { | 5819 switch (Producer->getKind()) { |
| 5693 default: | 5820 default: |
| 5694 llvm::report_fatal_error("Unexpected producer."); | 5821 llvm::report_fatal_error("Unexpected producer."); |
| 5695 case Inst::Icmp: { | 5822 case Inst::Icmp: { |
| 5696 Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer)); | 5823 Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer)); |
| 5697 FlagsWereSet = true; | 5824 FlagsWereSet = true; |
| 5698 } break; | 5825 } break; |
| 5699 case Inst::Fcmp: { | 5826 case Inst::Fcmp: { |
| 5700 Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer)); | 5827 Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer)); |
| 5701 FlagsWereSet = true; | 5828 FlagsWereSet = true; |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5765 | 5892 |
| 5766 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest, | 5893 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest, |
| 5767 Operand *Boolean) { | 5894 Operand *Boolean) { |
| 5768 assert(Boolean->getType() == IceType_i1); | 5895 assert(Boolean->getType() == IceType_i1); |
| 5769 Variable *T = makeReg(IceType_i1); | 5896 Variable *T = makeReg(IceType_i1); |
| 5770 Operand *_0 = | 5897 Operand *_0 = |
| 5771 legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex); | 5898 legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex); |
| 5772 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); | 5899 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); |
| 5773 | 5900 |
| 5774 SafeBoolChain Safe = SBC_Yes; | 5901 SafeBoolChain Safe = SBC_Yes; |
| 5775 if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) { | 5902 if (const Inst *Producer = Computations.getProducerOf(Boolean)) { |
| 5776 switch (Producer->getKind()) { | 5903 switch (Producer->getKind()) { |
| 5777 default: | 5904 default: |
| 5778 llvm::report_fatal_error("Unexpected producer."); | 5905 llvm::report_fatal_error("Unexpected producer."); |
| 5779 case Inst::Icmp: { | 5906 case Inst::Icmp: { |
| 5780 _mov(T, _0); | 5907 _mov(T, _0); |
| 5781 CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer)); | 5908 CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer)); |
| 5782 assert(Cond.WhenTrue0 != CondARM32::AL); | 5909 assert(Cond.WhenTrue0 != CondARM32::AL); |
| 5783 assert(Cond.WhenTrue0 != CondARM32::kNone); | 5910 assert(Cond.WhenTrue0 != CondARM32::kNone); |
| 5784 assert(Cond.WhenTrue1 == CondARM32::kNone); | 5911 assert(Cond.WhenTrue1 == CondARM32::kNone); |
| 5785 _mov_redefined(T, _1, Cond.WhenTrue0); | 5912 _mov_redefined(T, _1, Cond.WhenTrue0); |
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5877 return false; | 6004 return false; |
| 5878 case InstArithmetic::And: | 6005 case InstArithmetic::And: |
| 5879 return !isVectorType(Instr.getDest()->getType()); | 6006 return !isVectorType(Instr.getDest()->getType()); |
| 5880 case InstArithmetic::Or: | 6007 case InstArithmetic::Or: |
| 5881 return !isVectorType(Instr.getDest()->getType()); | 6008 return !isVectorType(Instr.getDest()->getType()); |
| 5882 } | 6009 } |
| 5883 } | 6010 } |
| 5884 } | 6011 } |
| 5885 } | 6012 } |
| 5886 } // end of namespace BoolFolding | 6013 } // end of namespace BoolFolding |
| 6014 | |
| 6015 namespace FpFolding { | |
| 6016 bool shouldTrackProducer(const Inst &Instr) { | |
| 6017 switch (Instr.getKind()) { | |
| 6018 default: | |
| 6019 return false; | |
| 6020 case Inst::Arithmetic: { | |
| 6021 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { | |
| 6022 default: | |
| 6023 return false; | |
| 6024 case InstArithmetic::Fmul: | |
| 6025 return true; | |
| 6026 } | |
| 6027 } | |
| 6028 } | |
| 6029 } | |
| 6030 | |
| 6031 bool isValidConsumer(const Inst &Instr) { | |
| 6032 switch (Instr.getKind()) { | |
| 6033 default: | |
| 6034 return false; | |
| 6035 case Inst::Arithmetic: { | |
| 6036 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { | |
| 6037 default: | |
| 6038 return false; | |
| 6039 case InstArithmetic::Fadd: | |
| 6040 case InstArithmetic::Fsub: | |
| 6041 return true; | |
| 6042 } | |
| 6043 } | |
| 6044 } | |
| 6045 } | |
| 6046 } // end of namespace FpFolding | |
| 6047 | |
| 6048 namespace IntFolding { | |
| 6049 bool shouldTrackProducer(const Inst &Instr) { | |
| 6050 switch (Instr.getKind()) { | |
| 6051 default: | |
| 6052 return false; | |
| 6053 case Inst::Arithmetic: { | |
| 6054 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { | |
| 6055 default: | |
| 6056 return false; | |
| 6057 case InstArithmetic::Mul: | |
| 6058 return true; | |
| 6059 } | |
| 6060 } | |
| 6061 } | |
| 6062 } | |
| 6063 | |
| 6064 bool isValidConsumer(const Inst &Instr) { | |
| 6065 switch (Instr.getKind()) { | |
| 6066 default: | |
| 6067 return false; | |
| 6068 case Inst::Arithmetic: { | |
| 6069 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { | |
| 6070 default: | |
| 6071 return false; | |
| 6072 case InstArithmetic::Add: | |
| 6073 case InstArithmetic::Sub: | |
| 6074 return true; | |
| 6075 } | |
| 6076 } | |
| 6077 } | |
| 6078 } | |
| 6079 } // end of namespace FpFolding | |
| 5887 } // end of anonymous namespace | 6080 } // end of anonymous namespace |
| 5888 | 6081 |
| 5889 void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) { | 6082 void TargetARM32::ComputationTracker::recordProducers(CfgNode *Node) { |
| 5890 for (Inst &Instr : Node->getInsts()) { | 6083 for (Inst &Instr : Node->getInsts()) { |
| 5891 // Check whether Instr is a valid producer. | 6084 // Check whether Instr is a valid producer. |
| 5892 Variable *Dest = Instr.getDest(); | 6085 Variable *Dest = Instr.getDest(); |
| 5893 if (!Instr.isDeleted() // only consider non-deleted instructions; and | 6086 if (!Instr.isDeleted() // only consider non-deleted instructions; and |
| 5894 && Dest // only instructions with an actual dest var; and | 6087 && Dest // only instructions with an actual dest var; and |
| 5895 && Dest->getType() == IceType_i1 // only bool-type dest vars; and | 6088 && Dest->getType() == IceType_i1 // only bool-type dest vars; and |
| 5896 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr. | 6089 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr. |
| 5897 KnownComputations.emplace(Dest->getIndex(), BoolComputationEntry(&Instr)); | 6090 KnownComputations.emplace(Dest->getIndex(), |
| 6091 ComputationEntry(&Instr, IceType_i1)); | |
| 6092 } | |
| 6093 if (!Instr.isDeleted() // only consider non-deleted instructions; and | |
| 6094 && Dest // only instructions with an actual dest var; and | |
| 6095 && isScalarFloatingType(Dest->getType()) // fp-type only dest vars; and | |
| 6096 && FpFolding::shouldTrackProducer(Instr)) { // white-listed instr. | |
| 6097 KnownComputations.emplace(Dest->getIndex(), | |
| 6098 ComputationEntry(&Instr, Dest->getType())); | |
| 6099 } | |
| 6100 if (!Instr.isDeleted() // only consider non-deleted instructions; and | |
| 6101 && Dest // only instructions with an actual dest var; and | |
| 6102 && Dest->getType() == IceType_i32 // i32 only dest vars; and | |
| 6103 && IntFolding::shouldTrackProducer(Instr)) { // white-listed instr. | |
| 6104 KnownComputations.emplace(Dest->getIndex(), | |
| 6105 ComputationEntry(&Instr, IceType_i32)); | |
| 5898 } | 6106 } |
| 5899 // Check each src variable against the map. | 6107 // Check each src variable against the map. |
| 5900 FOREACH_VAR_IN_INST(Var, Instr) { | 6108 FOREACH_VAR_IN_INST(Var, Instr) { |
| 5901 SizeT VarNum = Var->getIndex(); | 6109 SizeT VarNum = Var->getIndex(); |
| 5902 auto ComputationIter = KnownComputations.find(VarNum); | 6110 auto ComputationIter = KnownComputations.find(VarNum); |
| 5903 if (ComputationIter == KnownComputations.end()) { | 6111 if (ComputationIter == KnownComputations.end()) { |
| 5904 continue; | 6112 continue; |
| 5905 } | 6113 } |
| 5906 | 6114 |
| 5907 ++ComputationIter->second.NumUses; | 6115 ++ComputationIter->second.NumUses; |
| 5908 if (!BoolFolding::isValidConsumer(Instr)) { | 6116 switch (ComputationIter->second.ComputationType) { |
| 6117 default: | |
| 5909 KnownComputations.erase(VarNum); | 6118 KnownComputations.erase(VarNum); |
| 5910 continue; | 6119 continue; |
| 6120 case IceType_i1: | |
| 6121 if (!BoolFolding::isValidConsumer(Instr)) { | |
| 6122 KnownComputations.erase(VarNum); | |
| 6123 continue; | |
| 6124 } | |
| 6125 break; | |
| 6126 case IceType_i32: | |
| 6127 if (IndexOfVarInInst(Var) != 1 || !IntFolding::isValidConsumer(Instr)) { | |
| 6128 KnownComputations.erase(VarNum); | |
| 6129 continue; | |
| 6130 } | |
| 6131 break; | |
| 6132 case IceType_f32: | |
| 6133 case IceType_f64: | |
| 6134 if (IndexOfVarInInst(Var) != 1 || !FpFolding::isValidConsumer(Instr)) { | |
| 6135 KnownComputations.erase(VarNum); | |
| 6136 continue; | |
| 6137 } | |
| 6138 break; | |
| 5911 } | 6139 } |
| 5912 | 6140 |
| 5913 if (Instr.isLastUse(Var)) { | 6141 if (Instr.isLastUse(Var)) { |
| 5914 ComputationIter->second.IsLiveOut = false; | 6142 ComputationIter->second.IsLiveOut = false; |
| 5915 } | 6143 } |
| 5916 } | 6144 } |
| 5917 } | 6145 } |
| 5918 | 6146 |
| 5919 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end(); | 6147 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end(); |
| 5920 Iter != End;) { | 6148 Iter != End;) { |
| (...skipping 325 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6246 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 6474 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
| 6247 // However, for compatibility with current NaCl LLVM, don't claim that. | 6475 // However, for compatibility with current NaCl LLVM, don't claim that. |
| 6248 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 6476 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
| 6249 } | 6477 } |
| 6250 | 6478 |
| 6251 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; | 6479 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; |
| 6252 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; | 6480 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; |
| 6253 llvm::SmallBitVector TargetARM32::ScratchRegs; | 6481 llvm::SmallBitVector TargetARM32::ScratchRegs; |
| 6254 | 6482 |
| 6255 } // end of namespace Ice | 6483 } // end of namespace Ice |
| OLD | NEW |