| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 858 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 869 } | 869 } |
| 870 | 870 |
| 871 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) { | 871 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) { |
| 872 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) | 872 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) |
| 873 return false; | 873 return false; |
| 874 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed; | 874 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed; |
| 875 ++NumGPRRegsUsed; | 875 ++NumGPRRegsUsed; |
| 876 return true; | 876 return true; |
| 877 } | 877 } |
| 878 | 878 |
| 879 // The calling convention helper class (TargetARM32::CallingConv) expects the |
| 880 // following registers to be declared in a certain order, so we have these |
| 881 // sanity checks to ensure nothing breaks unknowingly. |
| 882 // TODO(jpp): modify the CallingConv class so it does not rely on any register |
| 883 // declaration order. |
| 884 #define SANITY_CHECK_QS(_0, _1) \ |
| 885 static_assert((RegARM32::Reg_##_1 + 1) == RegARM32::Reg_##_0, \ |
| 886 "ARM32 " #_0 " and " #_1 " registers are declared " \ |
| 887 "incorrectly.") |
| 888 SANITY_CHECK_QS(q0, q1); |
| 889 SANITY_CHECK_QS(q1, q2); |
| 890 SANITY_CHECK_QS(q2, q3); |
| 891 SANITY_CHECK_QS(q3, q4); |
| 892 #undef SANITY_CHECK_QS |
| 893 #define SANITY_CHECK_DS(_0, _1) \ |
| 894 static_assert((RegARM32::Reg_##_1 + 1) == RegARM32::Reg_##_0, \ |
| 895 "ARM32 " #_0 " and " #_1 " registers are declared " \ |
| 896 "incorrectly.") |
| 897 SANITY_CHECK_DS(d0, d1); |
| 898 SANITY_CHECK_DS(d1, d2); |
| 899 SANITY_CHECK_DS(d2, d3); |
| 900 SANITY_CHECK_DS(d3, d4); |
| 901 SANITY_CHECK_DS(d4, d5); |
| 902 SANITY_CHECK_DS(d5, d6); |
| 903 SANITY_CHECK_DS(d6, d7); |
| 904 SANITY_CHECK_DS(d7, d8); |
| 905 #undef SANITY_CHECK_DS |
| 906 #define SANITY_CHECK_SS(_0, _1) \ |
| 907 static_assert((RegARM32::Reg_##_0 + 1) == RegARM32::Reg_##_1, \ |
| 908 "ARM32 " #_0 " and " #_1 " registers are declared " \ |
| 909 "incorrectly.") |
| 910 SANITY_CHECK_SS(s0, s1); |
| 911 SANITY_CHECK_SS(s1, s2); |
| 912 SANITY_CHECK_SS(s2, s3); |
| 913 SANITY_CHECK_SS(s3, s4); |
| 914 SANITY_CHECK_SS(s4, s5); |
| 915 SANITY_CHECK_SS(s5, s6); |
| 916 SANITY_CHECK_SS(s6, s7); |
| 917 SANITY_CHECK_SS(s7, s8); |
| 918 SANITY_CHECK_SS(s8, s9); |
| 919 SANITY_CHECK_SS(s9, s10); |
| 920 SANITY_CHECK_SS(s10, s11); |
| 921 SANITY_CHECK_SS(s11, s12); |
| 922 SANITY_CHECK_SS(s12, s13); |
| 923 SANITY_CHECK_SS(s13, s14); |
| 924 SANITY_CHECK_SS(s14, s15); |
| 925 #undef SANITY_CHECK_SS |
| 926 |
| 879 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { | 927 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { |
| 880 if (!VFPRegsFree.any()) { | 928 if (!VFPRegsFree.any()) { |
| 881 return false; | 929 return false; |
| 882 } | 930 } |
| 883 | 931 |
| 884 if (isVectorType(Ty)) { | 932 if (isVectorType(Ty)) { |
| 885 // Q registers are declared in reverse order, so RegARM32::Reg_q0 > | 933 // Q registers are declared in reverse order, so RegARM32::Reg_q0 > |
| 886 // RegARM32::Reg_q1. Therefore, we need to subtract QRegStart from Reg_q0. | 934 // RegARM32::Reg_q1. Therefore, we need to subtract QRegStart from Reg_q0. |
| 887 // Same thing goes for D registers. | 935 // Same thing goes for D registers. |
| 888 static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1, | |
| 889 "ARM32 Q registers are possibly declared incorrectly."); | |
| 890 | |
| 891 int32_t QRegStart = (VFPRegsFree & ValidV128Regs).find_first(); | 936 int32_t QRegStart = (VFPRegsFree & ValidV128Regs).find_first(); |
| 892 if (QRegStart >= 0) { | 937 if (QRegStart >= 0) { |
| 893 VFPRegsFree.reset(QRegStart, QRegStart + 4); | 938 VFPRegsFree.reset(QRegStart, QRegStart + 4); |
| 894 *Reg = RegARM32::Reg_q0 - (QRegStart / 4); | 939 *Reg = RegARM32::Reg_q0 - (QRegStart / 4); |
| 895 return true; | 940 return true; |
| 896 } | 941 } |
| 897 } else if (Ty == IceType_f64) { | 942 } else if (Ty == IceType_f64) { |
| 898 static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1, | |
| 899 "ARM32 D registers are possibly declared incorrectly."); | |
| 900 | |
| 901 int32_t DRegStart = (VFPRegsFree & ValidF64Regs).find_first(); | 943 int32_t DRegStart = (VFPRegsFree & ValidF64Regs).find_first(); |
| 902 if (DRegStart >= 0) { | 944 if (DRegStart >= 0) { |
| 903 VFPRegsFree.reset(DRegStart, DRegStart + 2); | 945 VFPRegsFree.reset(DRegStart, DRegStart + 2); |
| 904 *Reg = RegARM32::Reg_d0 - (DRegStart / 2); | 946 *Reg = RegARM32::Reg_d0 - (DRegStart / 2); |
| 905 return true; | 947 return true; |
| 906 } | 948 } |
| 907 } else { | 949 } else { |
| 908 static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1, | |
| 909 "ARM32 S registers are possibly declared incorrectly."); | |
| 910 | |
| 911 assert(Ty == IceType_f32); | 950 assert(Ty == IceType_f32); |
| 912 int32_t SReg = VFPRegsFree.find_first(); | 951 int32_t SReg = VFPRegsFree.find_first(); |
| 913 assert(SReg >= 0); | 952 assert(SReg >= 0); |
| 914 VFPRegsFree.reset(SReg); | 953 VFPRegsFree.reset(SReg); |
| 915 *Reg = RegARM32::Reg_s0 + SReg; | 954 *Reg = RegARM32::Reg_s0 + SReg; |
| 916 return true; | 955 return true; |
| 917 } | 956 } |
| 918 | 957 |
| 919 // Parameter allocation failed. From now on, every fp register must be placed | 958 // Parameter allocation failed. From now on, every fp register must be placed |
| 920 // on the stack. We clear VFRegsFree in case there are any "holes" from S and | 959 // on the stack. We clear VFRegsFree in case there are any "holes" from S and |
| (...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1089 | 1128 |
| 1090 // Compute the list of spilled variables and bounds for GlobalsSize, etc. | 1129 // Compute the list of spilled variables and bounds for GlobalsSize, etc. |
| 1091 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, | 1130 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, |
| 1092 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, | 1131 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, |
| 1093 &LocalsSlotsAlignmentBytes, TargetVarHook); | 1132 &LocalsSlotsAlignmentBytes, TargetVarHook); |
| 1094 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; | 1133 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; |
| 1095 SpillAreaSizeBytes += GlobalsSize; | 1134 SpillAreaSizeBytes += GlobalsSize; |
| 1096 | 1135 |
| 1097 // Add push instructions for preserved registers. On ARM, "push" can push a | 1136 // Add push instructions for preserved registers. On ARM, "push" can push a |
| 1098 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has | 1137 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has |
| 1099 // callee-saved float/vector registers. The "vpush" instruction can handle a | 1138 // callee-saved float/vector registers. |
| 1100 // whole list of float/vector registers, but it only handles contiguous | 1139 // |
| 1101 // sequences of registers by specifying the start and the length. | 1140 // The "vpush" instruction can handle a whole list of float/vector registers, |
| 1102 VarList GPRsToPreserve; | 1141 // but it only handles contiguous sequences of registers by specifying the |
| 1103 GPRsToPreserve.reserve(CalleeSaves.size()); | 1142 // start and the length. |
| 1104 uint32_t NumCallee = 0; | 1143 PreservedGPRs.reserve(CalleeSaves.size()); |
| 1105 size_t PreservedRegsSizeBytes = 0; | 1144 PreservedSRegs.reserve(CalleeSaves.size()); |
| 1145 |
| 1106 // Consider FP and LR as callee-save / used as needed. | 1146 // Consider FP and LR as callee-save / used as needed. |
| 1107 if (UsesFramePointer) { | 1147 if (UsesFramePointer) { |
| 1148 if (RegsUsed[RegARM32::Reg_fp]) { |
| 1149 llvm::report_fatal_error("Frame pointer has been used."); |
| 1150 } |
| 1108 CalleeSaves[RegARM32::Reg_fp] = true; | 1151 CalleeSaves[RegARM32::Reg_fp] = true; |
| 1109 assert(RegsUsed[RegARM32::Reg_fp] == false); | |
| 1110 RegsUsed[RegARM32::Reg_fp] = true; | 1152 RegsUsed[RegARM32::Reg_fp] = true; |
| 1111 } | 1153 } |
| 1112 if (!MaybeLeafFunc) { | 1154 if (!MaybeLeafFunc) { |
| 1113 CalleeSaves[RegARM32::Reg_lr] = true; | 1155 CalleeSaves[RegARM32::Reg_lr] = true; |
| 1114 RegsUsed[RegARM32::Reg_lr] = true; | 1156 RegsUsed[RegARM32::Reg_lr] = true; |
| 1115 } | 1157 } |
| 1158 |
| 1159 // Make two passes over the used registers. The first pass records all the |
| 1160 // used registers -- and their aliases. Then, we figure out which GPRs and |
| 1161 // VFP S registers should be saved. We don't bother saving D/Q registers |
| 1162 // because their uses are recorded as S regs uses. |
| 1163 llvm::SmallBitVector ToPreserve(RegARM32::Reg_NUM); |
| 1116 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | 1164 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
| 1117 if (RegARM32::isI64RegisterPair(i)) { | 1165 if (NeedSandboxing && i == RegARM32::Reg_r9) { |
| 1118 // We don't save register pairs explicitly. Instead, we rely on the code | 1166 // r9 is never updated in sandboxed code. |
| 1119 // fake-defing/fake-using each register in the pair. | |
| 1120 continue; | 1167 continue; |
| 1121 } | 1168 } |
| 1122 if (CalleeSaves[i] && RegsUsed[i]) { | 1169 if (CalleeSaves[i] && RegsUsed[i]) { |
| 1123 if (NeedSandboxing && i == RegARM32::Reg_r9) { | 1170 ToPreserve |= RegisterAliases[i]; |
| 1124 // r9 is never updated in sandboxed code. | 1171 } |
| 1172 } |
| 1173 |
| 1174 uint32_t NumCallee = 0; |
| 1175 size_t PreservedRegsSizeBytes = 0; |
| 1176 |
| 1177 // RegClasses is a tuple of |
| 1178 // |
| 1179 // <First Register in Class, Last Register in Class, Vector of Save Registers> |
| 1180 // |
| 1181 // We use this tuple to figure out which register we should push/pop during |
| 1182 // prolog/epilog. |
| 1183 using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>; |
| 1184 const RegClassType RegClasses[] = { |
| 1185 RegClassType(RegARM32::Reg_GPR_First, RegARM32::Reg_GPR_Last, |
| 1186 &PreservedGPRs), |
| 1187 RegClassType(RegARM32::Reg_SREG_First, RegARM32::Reg_SREG_Last, |
| 1188 &PreservedSRegs)}; |
| 1189 for (const auto &RegClass : RegClasses) { |
| 1190 const uint32_t FirstRegInClass = std::get<0>(RegClass); |
| 1191 const uint32_t LastRegInClass = std::get<1>(RegClass); |
| 1192 VarList *const PreservedRegsInClass = std::get<2>(RegClass); |
| 1193 for (uint32_t Reg = FirstRegInClass; Reg <= LastRegInClass; ++Reg) { |
| 1194 if (!ToPreserve[Reg]) { |
| 1125 continue; | 1195 continue; |
| 1126 } | 1196 } |
| 1127 ++NumCallee; | 1197 ++NumCallee; |
| 1128 Variable *PhysicalRegister = getPhysicalRegister(i); | 1198 Variable *PhysicalRegister = getPhysicalRegister(Reg); |
| 1129 PreservedRegsSizeBytes += | 1199 PreservedRegsSizeBytes += |
| 1130 typeWidthInBytesOnStack(PhysicalRegister->getType()); | 1200 typeWidthInBytesOnStack(PhysicalRegister->getType()); |
| 1131 GPRsToPreserve.push_back(getPhysicalRegister(i)); | 1201 PreservedRegsInClass->push_back(PhysicalRegister); |
| 1132 } | 1202 } |
| 1133 } | 1203 } |
| 1204 |
| 1134 Ctx->statsUpdateRegistersSaved(NumCallee); | 1205 Ctx->statsUpdateRegistersSaved(NumCallee); |
| 1135 if (!GPRsToPreserve.empty()) | 1206 if (!PreservedSRegs.empty()) |
| 1136 _push(GPRsToPreserve); | 1207 _push(PreservedSRegs); |
| 1208 if (!PreservedGPRs.empty()) |
| 1209 _push(PreservedGPRs); |
| 1137 | 1210 |
| 1138 // Generate "mov FP, SP" if needed. | 1211 // Generate "mov FP, SP" if needed. |
| 1139 if (UsesFramePointer) { | 1212 if (UsesFramePointer) { |
| 1140 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); | 1213 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); |
| 1141 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 1214 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
| 1142 _mov(FP, SP); | 1215 _mov(FP, SP); |
| 1143 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). | 1216 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). |
| 1144 Context.insert(InstFakeUse::create(Func, FP)); | 1217 Context.insert(InstFakeUse::create(Func, FP)); |
| 1145 } | 1218 } |
| 1146 | 1219 |
| 1147 // Align the variables area. SpillAreaPaddingBytes is the size of the region | 1220 // Align the variables area. SpillAreaPaddingBytes is the size of the region |
| 1148 // after the preserved registers and before the spill areas. | 1221 // after the preserved registers and before the spill areas. |
| 1149 // LocalsSlotsPaddingBytes is the amount of padding between the globals and | 1222 // LocalsSlotsPaddingBytes is the amount of padding between the globals and |
| 1150 // locals area if they are separate. | 1223 // locals area if they are separate. |
| 1151 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES); | 1224 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES); |
| 1152 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); | 1225 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); |
| 1153 uint32_t SpillAreaPaddingBytes = 0; | 1226 uint32_t SpillAreaPaddingBytes = 0; |
| 1154 uint32_t LocalsSlotsPaddingBytes = 0; | 1227 uint32_t LocalsSlotsPaddingBytes = 0; |
| 1155 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, | 1228 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, |
| 1156 GlobalsSize, LocalsSlotsAlignmentBytes, | 1229 GlobalsSize, LocalsSlotsAlignmentBytes, |
| 1157 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); | 1230 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); |
| 1158 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; | 1231 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; |
| 1159 uint32_t GlobalsAndSubsequentPaddingSize = | 1232 uint32_t GlobalsAndSubsequentPaddingSize = |
| 1160 GlobalsSize + LocalsSlotsPaddingBytes; | 1233 GlobalsSize + LocalsSlotsPaddingBytes; |
| 1161 | 1234 |
| 1162 // Adds the out args space to the stack, and align SP if necessary. | 1235 // Adds the out args space to the stack, and align SP if necessary. |
| 1163 if (NeedsStackAlignment) { | 1236 if (!NeedsStackAlignment) { |
| 1237 SpillAreaSizeBytes += MaxOutArgsSizeBytes; |
| 1238 } else { |
| 1164 uint32_t StackOffset = PreservedRegsSizeBytes; | 1239 uint32_t StackOffset = PreservedRegsSizeBytes; |
| 1165 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); | 1240 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); |
| 1166 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes); | 1241 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes); |
| 1167 SpillAreaSizeBytes = StackSize - StackOffset; | 1242 SpillAreaSizeBytes = StackSize - StackOffset; |
| 1168 } else { | |
| 1169 SpillAreaSizeBytes += MaxOutArgsSizeBytes; | |
| 1170 } | 1243 } |
| 1171 | 1244 |
| 1172 // Combine fixed alloca with SpillAreaSize. | 1245 // Combine fixed alloca with SpillAreaSize. |
| 1173 SpillAreaSizeBytes += FixedAllocaSizeBytes; | 1246 SpillAreaSizeBytes += FixedAllocaSizeBytes; |
| 1174 | 1247 |
| 1175 // Generate "sub sp, SpillAreaSizeBytes" | 1248 // Generate "sub sp, SpillAreaSizeBytes" |
| 1176 if (SpillAreaSizeBytes) { | 1249 if (SpillAreaSizeBytes) { |
| 1177 // Use the scratch register if needed to legalize the immediate. | 1250 // Use the scratch register if needed to legalize the immediate. |
| 1178 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), | 1251 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
| 1179 Legal_Reg | Legal_Flex, getReservedTmpReg()); | 1252 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
| (...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1278 // add SP, SpillAreaSizeBytes | 1351 // add SP, SpillAreaSizeBytes |
| 1279 if (SpillAreaSizeBytes) { | 1352 if (SpillAreaSizeBytes) { |
| 1280 // Use the scratch register if needed to legalize the immediate. | 1353 // Use the scratch register if needed to legalize the immediate. |
| 1281 Operand *AddAmount = | 1354 Operand *AddAmount = |
| 1282 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), | 1355 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
| 1283 Legal_Reg | Legal_Flex, getReservedTmpReg()); | 1356 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
| 1284 Sandboxer(this).add_sp(AddAmount); | 1357 Sandboxer(this).add_sp(AddAmount); |
| 1285 } | 1358 } |
| 1286 } | 1359 } |
| 1287 | 1360 |
| 1288 // Add pop instructions for preserved registers. | 1361 if (!PreservedGPRs.empty()) |
| 1289 llvm::SmallBitVector CalleeSaves = | 1362 _pop(PreservedGPRs); |
| 1290 getRegisterSet(RegSet_CalleeSave, RegSet_None); | 1363 if (!PreservedSRegs.empty()) |
| 1291 VarList GPRsToRestore; | 1364 _pop(PreservedSRegs); |
| 1292 GPRsToRestore.reserve(CalleeSaves.size()); | |
| 1293 // Consider FP and LR as callee-save / used as needed. | |
| 1294 if (UsesFramePointer) { | |
| 1295 CalleeSaves[RegARM32::Reg_fp] = true; | |
| 1296 } | |
| 1297 if (!MaybeLeafFunc) { | |
| 1298 CalleeSaves[RegARM32::Reg_lr] = true; | |
| 1299 } | |
| 1300 // Pop registers in ascending order just like push (instead of in reverse | |
| 1301 // order). | |
| 1302 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | |
| 1303 if (RegARM32::isI64RegisterPair(i)) { | |
| 1304 continue; | |
| 1305 } | |
| 1306 | |
| 1307 if (CalleeSaves[i] && RegsUsed[i]) { | |
| 1308 if (NeedSandboxing && i == RegARM32::Reg_r9) { | |
| 1309 continue; | |
| 1310 } | |
| 1311 GPRsToRestore.push_back(getPhysicalRegister(i)); | |
| 1312 } | |
| 1313 } | |
| 1314 if (!GPRsToRestore.empty()) | |
| 1315 _pop(GPRsToRestore); | |
| 1316 | 1365 |
| 1317 if (!Ctx->getFlags().getUseSandboxing()) | 1366 if (!Ctx->getFlags().getUseSandboxing()) |
| 1318 return; | 1367 return; |
| 1319 | 1368 |
| 1320 // Change the original ret instruction into a sandboxed return sequence. | 1369 // Change the original ret instruction into a sandboxed return sequence. |
| 1370 // |
| 1321 // bundle_lock | 1371 // bundle_lock |
| 1322 // bic lr, #0xc000000f | 1372 // bic lr, #0xc000000f |
| 1323 // bx lr | 1373 // bx lr |
| 1324 // bundle_unlock | 1374 // bundle_unlock |
| 1375 // |
| 1325 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to | 1376 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to |
| 1326 // restrict to the lower 1GB as well. | 1377 // restrict to the lower 1GB as well. |
| 1327 Variable *LR = getPhysicalRegister(RegARM32::Reg_lr); | 1378 Variable *LR = getPhysicalRegister(RegARM32::Reg_lr); |
| 1328 Variable *RetValue = nullptr; | 1379 Variable *RetValue = nullptr; |
| 1329 if (RI->getSrcSize()) | 1380 if (RI->getSrcSize()) |
| 1330 RetValue = llvm::cast<Variable>(RI->getSrc(0)); | 1381 RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
| 1331 | 1382 |
| 1332 Sandboxer(this).ret(LR, RetValue); | 1383 Sandboxer(this).ret(LR, RetValue); |
| 1333 | 1384 |
| 1334 RI->setDeleted(); | 1385 RI->setDeleted(); |
| (...skipping 1299 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2634 Operations)) { | 2685 Operations)) { |
| 2635 return false; | 2686 return false; |
| 2636 } | 2687 } |
| 2637 } | 2688 } |
| 2638 | 2689 |
| 2639 return Src == 0; | 2690 return Src == 0; |
| 2640 } | 2691 } |
| 2641 } // end of namespace StrengthReduction | 2692 } // end of namespace StrengthReduction |
| 2642 } // end of anonymous namespace | 2693 } // end of anonymous namespace |
| 2643 | 2694 |
| 2644 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { | 2695 void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) { |
| 2645 Variable *Dest = Inst->getDest(); | 2696 Variable *Dest = Instr->getDest(); |
| 2646 | 2697 |
| 2647 if (Dest->isRematerializable()) { | 2698 if (Dest->isRematerializable()) { |
| 2648 Context.insert(InstFakeDef::create(Func, Dest)); | 2699 Context.insert(InstFakeDef::create(Func, Dest)); |
| 2649 return; | 2700 return; |
| 2650 } | 2701 } |
| 2651 | 2702 |
| 2652 Type DestTy = Dest->getType(); | 2703 Type DestTy = Dest->getType(); |
| 2653 if (DestTy == IceType_i1) { | 2704 if (DestTy == IceType_i1) { |
| 2654 lowerInt1Arithmetic(Inst); | 2705 lowerInt1Arithmetic(Instr); |
| 2655 return; | 2706 return; |
| 2656 } | 2707 } |
| 2657 | 2708 |
| 2658 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 2709 Operand *Src0 = legalizeUndef(Instr->getSrc(0)); |
| 2659 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | 2710 Operand *Src1 = legalizeUndef(Instr->getSrc(1)); |
| 2660 if (DestTy == IceType_i64) { | 2711 if (DestTy == IceType_i64) { |
| 2661 lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1); | 2712 lowerInt64Arithmetic(Instr->getOp(), Instr->getDest(), Src0, Src1); |
| 2662 return; | 2713 return; |
| 2663 } | 2714 } |
| 2664 | 2715 |
| 2665 if (isVectorType(DestTy)) { | 2716 if (isVectorType(DestTy)) { |
| 2666 // Add a fake def to keep liveness consistent in the meantime. | 2717 // Add a fake def to keep liveness consistent in the meantime. |
| 2667 Variable *T = makeReg(DestTy); | 2718 Variable *T = makeReg(DestTy); |
| 2668 Context.insert(InstFakeDef::create(Func, T)); | 2719 Context.insert(InstFakeDef::create(Func, T)); |
| 2669 _mov(Dest, T); | 2720 _mov(Dest, T); |
| 2670 UnimplementedError(Func->getContext()->getFlags()); | 2721 UnimplementedError(Func->getContext()->getFlags()); |
| 2671 return; | 2722 return; |
| 2672 } | 2723 } |
| 2673 | 2724 |
| 2674 // DestTy is a non-i64 scalar. | 2725 // DestTy is a non-i64 scalar. |
| 2675 Variable *T = makeReg(DestTy); | 2726 Variable *T = makeReg(DestTy); |
| 2676 | 2727 |
| 2677 // * Handle div/rem separately. They require a non-legalized Src1 to inspect | 2728 // * Handle div/rem separately. They require a non-legalized Src1 to inspect |
| 2678 // whether or not Src1 is a non-zero constant. Once legalized it is more | 2729 // whether or not Src1 is a non-zero constant. Once legalized it is more |
| 2679 // difficult to determine (constant may be moved to a register). | 2730 // difficult to determine (constant may be moved to a register). |
| 2680 // * Handle floating point arithmetic separately: they require Src1 to be | 2731 // * Handle floating point arithmetic separately: they require Src1 to be |
| 2681 // legalized to a register. | 2732 // legalized to a register. |
| 2682 switch (Inst->getOp()) { | 2733 switch (Instr->getOp()) { |
| 2683 default: | 2734 default: |
| 2684 break; | 2735 break; |
| 2685 case InstArithmetic::Udiv: { | 2736 case InstArithmetic::Udiv: { |
| 2686 constexpr bool NotRemainder = false; | 2737 constexpr bool NotRemainder = false; |
| 2687 Variable *Src0R = legalizeToReg(Src0); | 2738 Variable *Src0R = legalizeToReg(Src0); |
| 2688 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, | 2739 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, |
| 2689 NotRemainder); | 2740 NotRemainder); |
| 2690 return; | 2741 return; |
| 2691 } | 2742 } |
| 2692 case InstArithmetic::Sdiv: { | 2743 case InstArithmetic::Sdiv: { |
| (...skipping 18 matching lines...) Expand all Loading... |
| 2711 return; | 2762 return; |
| 2712 } | 2763 } |
| 2713 case InstArithmetic::Frem: { | 2764 case InstArithmetic::Frem: { |
| 2714 if (!isScalarFloatingType(DestTy)) { | 2765 if (!isScalarFloatingType(DestTy)) { |
| 2715 llvm::report_fatal_error("Unexpected type when lowering frem."); | 2766 llvm::report_fatal_error("Unexpected type when lowering frem."); |
| 2716 } | 2767 } |
| 2717 llvm::report_fatal_error("Frem should have already been lowered."); | 2768 llvm::report_fatal_error("Frem should have already been lowered."); |
| 2718 } | 2769 } |
| 2719 case InstArithmetic::Fadd: { | 2770 case InstArithmetic::Fadd: { |
| 2720 Variable *Src0R = legalizeToReg(Src0); | 2771 Variable *Src0R = legalizeToReg(Src0); |
| 2772 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) { |
| 2773 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0)); |
| 2774 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1)); |
| 2775 _vmla(Src0R, Src1R, Src2R); |
| 2776 _mov(Dest, Src0R); |
| 2777 return; |
| 2778 } |
| 2779 |
| 2721 Variable *Src1R = legalizeToReg(Src1); | 2780 Variable *Src1R = legalizeToReg(Src1); |
| 2722 _vadd(T, Src0R, Src1R); | 2781 _vadd(T, Src0R, Src1R); |
| 2723 _mov(Dest, T); | 2782 _mov(Dest, T); |
| 2724 return; | 2783 return; |
| 2725 } | 2784 } |
| 2726 case InstArithmetic::Fsub: { | 2785 case InstArithmetic::Fsub: { |
| 2727 Variable *Src0R = legalizeToReg(Src0); | 2786 Variable *Src0R = legalizeToReg(Src0); |
| 2787 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) { |
| 2788 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0)); |
| 2789 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1)); |
| 2790 _vmls(Src0R, Src1R, Src2R); |
| 2791 _mov(Dest, Src0R); |
| 2792 return; |
| 2793 } |
| 2728 Variable *Src1R = legalizeToReg(Src1); | 2794 Variable *Src1R = legalizeToReg(Src1); |
| 2729 _vsub(T, Src0R, Src1R); | 2795 _vsub(T, Src0R, Src1R); |
| 2730 _mov(Dest, T); | 2796 _mov(Dest, T); |
| 2731 return; | 2797 return; |
| 2732 } | 2798 } |
| 2733 case InstArithmetic::Fmul: { | 2799 case InstArithmetic::Fmul: { |
| 2734 Variable *Src0R = legalizeToReg(Src0); | 2800 Variable *Src0R = legalizeToReg(Src0); |
| 2735 Variable *Src1R = legalizeToReg(Src1); | 2801 Variable *Src1R = legalizeToReg(Src1); |
| 2736 _vmul(T, Src0R, Src1R); | 2802 _vmul(T, Src0R, Src1R); |
| 2737 _mov(Dest, T); | 2803 _mov(Dest, T); |
| 2738 return; | 2804 return; |
| 2739 } | 2805 } |
| 2740 case InstArithmetic::Fdiv: { | 2806 case InstArithmetic::Fdiv: { |
| 2741 Variable *Src0R = legalizeToReg(Src0); | 2807 Variable *Src0R = legalizeToReg(Src0); |
| 2742 Variable *Src1R = legalizeToReg(Src1); | 2808 Variable *Src1R = legalizeToReg(Src1); |
| 2743 _vdiv(T, Src0R, Src1R); | 2809 _vdiv(T, Src0R, Src1R); |
| 2744 _mov(Dest, T); | 2810 _mov(Dest, T); |
| 2745 return; | 2811 return; |
| 2746 } | 2812 } |
| 2747 } | 2813 } |
| 2748 | 2814 |
| 2749 // Handle everything else here. | 2815 // Handle everything else here. |
| 2750 Int32Operands Srcs(Src0, Src1); | 2816 Int32Operands Srcs(Src0, Src1); |
| 2751 switch (Inst->getOp()) { | 2817 switch (Instr->getOp()) { |
| 2752 case InstArithmetic::_num: | 2818 case InstArithmetic::_num: |
| 2753 llvm::report_fatal_error("Unknown arithmetic operator"); | 2819 llvm::report_fatal_error("Unknown arithmetic operator"); |
| 2754 return; | 2820 return; |
| 2755 case InstArithmetic::Add: { | 2821 case InstArithmetic::Add: { |
| 2822 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) { |
| 2823 Variable *Src0R = legalizeToReg(Src0); |
| 2824 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0)); |
| 2825 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1)); |
| 2826 _mla(T, Src1R, Src2R, Src0R); |
| 2827 _mov(Dest, T); |
| 2828 return; |
| 2829 } |
| 2830 |
| 2756 if (Srcs.hasConstOperand()) { | 2831 if (Srcs.hasConstOperand()) { |
| 2757 if (!Srcs.immediateIsFlexEncodable() && | 2832 if (!Srcs.immediateIsFlexEncodable() && |
| 2758 Srcs.negatedImmediateIsFlexEncodable()) { | 2833 Srcs.negatedImmediateIsFlexEncodable()) { |
| 2759 Variable *Src0R = Srcs.src0R(this); | 2834 Variable *Src0R = Srcs.src0R(this); |
| 2760 Operand *Src1F = Srcs.negatedSrc1F(this); | 2835 Operand *Src1F = Srcs.negatedSrc1F(this); |
| 2761 if (!Srcs.swappedOperands()) { | 2836 if (!Srcs.swappedOperands()) { |
| 2762 _sub(T, Src0R, Src1F); | 2837 _sub(T, Src0R, Src1F); |
| 2763 } else { | 2838 } else { |
| 2764 _rsb(T, Src0R, Src1F); | 2839 _rsb(T, Src0R, Src1F); |
| 2765 } | 2840 } |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2798 return; | 2873 return; |
| 2799 } | 2874 } |
| 2800 case InstArithmetic::Xor: { | 2875 case InstArithmetic::Xor: { |
| 2801 Variable *Src0R = Srcs.src0R(this); | 2876 Variable *Src0R = Srcs.src0R(this); |
| 2802 Operand *Src1RF = Srcs.src1RF(this); | 2877 Operand *Src1RF = Srcs.src1RF(this); |
| 2803 _eor(T, Src0R, Src1RF); | 2878 _eor(T, Src0R, Src1RF); |
| 2804 _mov(Dest, T); | 2879 _mov(Dest, T); |
| 2805 return; | 2880 return; |
| 2806 } | 2881 } |
| 2807 case InstArithmetic::Sub: { | 2882 case InstArithmetic::Sub: { |
| 2883 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) { |
| 2884 Variable *Src0R = legalizeToReg(Src0); |
| 2885 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0)); |
| 2886 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1)); |
| 2887 _mls(T, Src1R, Src2R, Src0R); |
| 2888 _mov(Dest, T); |
| 2889 return; |
| 2890 } |
| 2891 |
| 2808 if (Srcs.hasConstOperand()) { | 2892 if (Srcs.hasConstOperand()) { |
| 2809 if (Srcs.immediateIsFlexEncodable()) { | 2893 if (Srcs.immediateIsFlexEncodable()) { |
| 2810 Variable *Src0R = Srcs.src0R(this); | 2894 Variable *Src0R = Srcs.src0R(this); |
| 2811 Operand *Src1RF = Srcs.src1RF(this); | 2895 Operand *Src1RF = Srcs.src1RF(this); |
| 2812 if (Srcs.swappedOperands()) { | 2896 if (Srcs.swappedOperands()) { |
| 2813 _rsb(T, Src0R, Src1RF); | 2897 _rsb(T, Src0R, Src1RF); |
| 2814 } else { | 2898 } else { |
| 2815 _sub(T, Src0R, Src1RF); | 2899 _sub(T, Src0R, Src1RF); |
| 2816 } | 2900 } |
| 2817 _mov(Dest, T); | 2901 _mov(Dest, T); |
| (...skipping 188 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3006 } | 3090 } |
| 3007 _mov(Dest, NewSrc); | 3091 _mov(Dest, NewSrc); |
| 3008 } | 3092 } |
| 3009 | 3093 |
| 3010 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( | 3094 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( |
| 3011 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, | 3095 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, |
| 3012 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { | 3096 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { |
| 3013 InstARM32Label *NewShortCircuitLabel = nullptr; | 3097 InstARM32Label *NewShortCircuitLabel = nullptr; |
| 3014 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); | 3098 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); |
| 3015 | 3099 |
| 3016 const Inst *Producer = BoolComputations.getProducerOf(Boolean); | 3100 const Inst *Producer = Computations.getProducerOf(Boolean); |
| 3017 | 3101 |
| 3018 if (Producer == nullptr) { | 3102 if (Producer == nullptr) { |
| 3019 // No producer, no problem: just do emit code to perform (Boolean & 1) and | 3103 // No producer, no problem: just do emit code to perform (Boolean & 1) and |
| 3020 // set the flags register. The branch should be taken if the resulting flags | 3104 // set the flags register. The branch should be taken if the resulting flags |
| 3021 // indicate a non-zero result. | 3105 // indicate a non-zero result. |
| 3022 _tst(legalizeToReg(Boolean), _1); | 3106 _tst(legalizeToReg(Boolean), _1); |
| 3023 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE)); | 3107 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE)); |
| 3024 } | 3108 } |
| 3025 | 3109 |
| 3026 switch (Producer->getKind()) { | 3110 switch (Producer->getKind()) { |
| (...skipping 200 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3227 Variable *ReturnReg = nullptr; | 3311 Variable *ReturnReg = nullptr; |
| 3228 Variable *ReturnRegHi = nullptr; | 3312 Variable *ReturnRegHi = nullptr; |
| 3229 if (Dest) { | 3313 if (Dest) { |
| 3230 switch (Dest->getType()) { | 3314 switch (Dest->getType()) { |
| 3231 case IceType_NUM: | 3315 case IceType_NUM: |
| 3232 llvm::report_fatal_error("Invalid Call dest type"); | 3316 llvm::report_fatal_error("Invalid Call dest type"); |
| 3233 break; | 3317 break; |
| 3234 case IceType_void: | 3318 case IceType_void: |
| 3235 break; | 3319 break; |
| 3236 case IceType_i1: | 3320 case IceType_i1: |
| 3237 assert(BoolComputations.getProducerOf(Dest) == nullptr); | 3321 assert(Computations.getProducerOf(Dest) == nullptr); |
| 3238 // Fall-through intended. | 3322 // Fall-through intended. |
| 3239 case IceType_i8: | 3323 case IceType_i8: |
| 3240 case IceType_i16: | 3324 case IceType_i16: |
| 3241 case IceType_i32: | 3325 case IceType_i32: |
| 3242 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0); | 3326 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0); |
| 3243 break; | 3327 break; |
| 3244 case IceType_i64: | 3328 case IceType_i64: |
| 3245 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0); | 3329 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0); |
| 3246 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1); | 3330 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1); |
| 3247 break; | 3331 break; |
| (...skipping 2054 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5302 Type Ty = Src->getType(); | 5386 Type Ty = Src->getType(); |
| 5303 Variable *Reg = makeReg(Ty, RegNum); | 5387 Variable *Reg = makeReg(Ty, RegNum); |
| 5304 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Src)) { | 5388 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Src)) { |
| 5305 _ldr(Reg, Mem); | 5389 _ldr(Reg, Mem); |
| 5306 } else { | 5390 } else { |
| 5307 _mov(Reg, Src); | 5391 _mov(Reg, Src); |
| 5308 } | 5392 } |
| 5309 return Reg; | 5393 return Reg; |
| 5310 } | 5394 } |
| 5311 | 5395 |
| 5396 // TODO(jpp): remove unneeded else clauses in legalize. |
| 5312 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, | 5397 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, |
| 5313 int32_t RegNum) { | 5398 int32_t RegNum) { |
| 5314 Type Ty = From->getType(); | 5399 Type Ty = From->getType(); |
| 5315 // Assert that a physical register is allowed. To date, all calls to | 5400 // Assert that a physical register is allowed. To date, all calls to |
| 5316 // legalize() allow a physical register. Legal_Flex converts registers to the | 5401 // legalize() allow a physical register. Legal_Flex converts registers to the |
| 5317 // right type OperandARM32FlexReg as needed. | 5402 // right type OperandARM32FlexReg as needed. |
| 5318 assert(Allowed & Legal_Reg); | 5403 assert(Allowed & Legal_Reg); |
| 5319 | 5404 |
| 5320 // Copied ipsis literis from TargetX86Base<Machine>. | 5405 // Copied ipsis literis from TargetX86Base<Machine>. |
| 5321 if (RegNum == Variable::NoRegister) { | 5406 if (RegNum == Variable::NoRegister) { |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5405 } | 5490 } |
| 5406 | 5491 |
| 5407 if (llvm::isa<Constant>(From)) { | 5492 if (llvm::isa<Constant>(From)) { |
| 5408 if (llvm::isa<ConstantUndef>(From)) { | 5493 if (llvm::isa<ConstantUndef>(From)) { |
| 5409 From = legalizeUndef(From, RegNum); | 5494 From = legalizeUndef(From, RegNum); |
| 5410 if (isVectorType(Ty)) | 5495 if (isVectorType(Ty)) |
| 5411 return From; | 5496 return From; |
| 5412 } | 5497 } |
| 5413 // There should be no constants of vector type (other than undef). | 5498 // There should be no constants of vector type (other than undef). |
| 5414 assert(!isVectorType(Ty)); | 5499 assert(!isVectorType(Ty)); |
| 5415 bool CanBeFlex = Allowed & Legal_Flex; | |
| 5416 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { | 5500 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { |
| 5417 uint32_t RotateAmt; | 5501 uint32_t RotateAmt; |
| 5418 uint32_t Immed_8; | 5502 uint32_t Immed_8; |
| 5419 uint32_t Value = static_cast<uint32_t>(C32->getValue()); | 5503 uint32_t Value = static_cast<uint32_t>(C32->getValue()); |
| 5420 // Check if the immediate will fit in a Flexible second operand, if a | 5504 if (OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) { |
| 5421 // Flexible second operand is allowed. We need to know the exact value, | 5505 // The immediate can be encoded as a Flex immediate. We may return the |
| 5422 // so that rules out relocatable constants. Also try the inverse and use | 5506 // Flex operand if the caller has Allow'ed it. |
| 5423 // MVN if possible. | 5507 auto *OpF = OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); |
| 5424 if (CanBeFlex && | 5508 const bool CanBeFlex = Allowed & Legal_Flex; |
| 5425 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) { | 5509 if (CanBeFlex) |
| 5426 return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); | 5510 return OpF; |
| 5427 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm( | 5511 return copyToReg(OpF, RegNum); |
| 5428 ~Value, &RotateAmt, &Immed_8)) { | 5512 } else if (OperandARM32FlexImm::canHoldImm(~Value, &RotateAmt, |
| 5429 auto InvertedFlex = | 5513 &Immed_8)) { |
| 5514 // Even though the immediate can't be encoded as a Flex operand, its |
| 5515 // inverted bit pattern can, thus we use ARM's mvn to load the 32-bit |
| 5516 // constant with a single instruction. |
| 5517 auto *InvOpF = |
| 5430 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); | 5518 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); |
| 5431 Variable *Reg = makeReg(Ty, RegNum); | 5519 Variable *Reg = makeReg(Ty, RegNum); |
| 5432 _mvn(Reg, InvertedFlex); | 5520 _mvn(Reg, InvOpF); |
| 5433 return Reg; | 5521 return Reg; |
| 5434 } else { | 5522 } else { |
| 5435 // Do a movw/movt to a register. | 5523 // Do a movw/movt to a register. |
| 5436 Variable *Reg = makeReg(Ty, RegNum); | 5524 Variable *Reg = makeReg(Ty, RegNum); |
| 5437 uint32_t UpperBits = (Value >> 16) & 0xFFFF; | 5525 uint32_t UpperBits = (Value >> 16) & 0xFFFF; |
| 5438 _movw(Reg, | 5526 _movw(Reg, |
| 5439 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); | 5527 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); |
| 5440 if (UpperBits != 0) { | 5528 if (UpperBits != 0) { |
| 5441 _movt(Reg, Ctx->getConstantInt32(UpperBits)); | 5529 _movt(Reg, Ctx->getConstantInt32(UpperBits)); |
| 5442 } | 5530 } |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5479 return copyToReg(From, RegNum); | 5567 return copyToReg(From, RegNum); |
| 5480 } | 5568 } |
| 5481 } | 5569 } |
| 5482 | 5570 |
| 5483 if (auto *Var = llvm::dyn_cast<Variable>(From)) { | 5571 if (auto *Var = llvm::dyn_cast<Variable>(From)) { |
| 5484 if (Var->isRematerializable()) { | 5572 if (Var->isRematerializable()) { |
| 5485 if (Allowed & Legal_Rematerializable) { | 5573 if (Allowed & Legal_Rematerializable) { |
| 5486 return From; | 5574 return From; |
| 5487 } | 5575 } |
| 5488 | 5576 |
| 5489 // TODO(jpp): We don't need to rematerialize Var if legalize() was invoked | |
| 5490 // for a Variable in a Mem operand. | |
| 5491 Variable *T = makeReg(Var->getType(), RegNum); | 5577 Variable *T = makeReg(Var->getType(), RegNum); |
| 5492 _mov(T, Var); | 5578 _mov(T, Var); |
| 5493 return T; | 5579 return T; |
| 5494 } | 5580 } |
| 5495 // Check if the variable is guaranteed a physical register. This can happen | 5581 // Check if the variable is guaranteed a physical register. This can happen |
| 5496 // either when the variable is pre-colored or when it is assigned infinite | 5582 // either when the variable is pre-colored or when it is assigned infinite |
| 5497 // weight. | 5583 // weight. |
| 5498 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); | 5584 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); |
| 5499 // We need a new physical register for the operand if: | 5585 // We need a new physical register for the operand if: |
| 5500 // Mem is not allowed and Var isn't guaranteed a physical | 5586 // Mem is not allowed and Var isn't guaranteed a physical |
| (...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5681 | 5767 |
| 5682 _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex)); | 5768 _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex)); |
| 5683 if (DestHi) { | 5769 if (DestHi) { |
| 5684 _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex)); | 5770 _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex)); |
| 5685 } | 5771 } |
| 5686 | 5772 |
| 5687 CondWhenTrue Cond(CondARM32::kNone); | 5773 CondWhenTrue Cond(CondARM32::kNone); |
| 5688 // FlagsWereSet is used to determine wether Boolean was folded or not. If not, | 5774 // FlagsWereSet is used to determine wether Boolean was folded or not. If not, |
| 5689 // add an explicit _tst instruction below. | 5775 // add an explicit _tst instruction below. |
| 5690 bool FlagsWereSet = false; | 5776 bool FlagsWereSet = false; |
| 5691 if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) { | 5777 if (const Inst *Producer = Computations.getProducerOf(Boolean)) { |
| 5692 switch (Producer->getKind()) { | 5778 switch (Producer->getKind()) { |
| 5693 default: | 5779 default: |
| 5694 llvm::report_fatal_error("Unexpected producer."); | 5780 llvm::report_fatal_error("Unexpected producer."); |
| 5695 case Inst::Icmp: { | 5781 case Inst::Icmp: { |
| 5696 Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer)); | 5782 Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer)); |
| 5697 FlagsWereSet = true; | 5783 FlagsWereSet = true; |
| 5698 } break; | 5784 } break; |
| 5699 case Inst::Fcmp: { | 5785 case Inst::Fcmp: { |
| 5700 Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer)); | 5786 Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer)); |
| 5701 FlagsWereSet = true; | 5787 FlagsWereSet = true; |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5765 | 5851 |
| 5766 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest, | 5852 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest, |
| 5767 Operand *Boolean) { | 5853 Operand *Boolean) { |
| 5768 assert(Boolean->getType() == IceType_i1); | 5854 assert(Boolean->getType() == IceType_i1); |
| 5769 Variable *T = makeReg(IceType_i1); | 5855 Variable *T = makeReg(IceType_i1); |
| 5770 Operand *_0 = | 5856 Operand *_0 = |
| 5771 legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex); | 5857 legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex); |
| 5772 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); | 5858 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); |
| 5773 | 5859 |
| 5774 SafeBoolChain Safe = SBC_Yes; | 5860 SafeBoolChain Safe = SBC_Yes; |
| 5775 if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) { | 5861 if (const Inst *Producer = Computations.getProducerOf(Boolean)) { |
| 5776 switch (Producer->getKind()) { | 5862 switch (Producer->getKind()) { |
| 5777 default: | 5863 default: |
| 5778 llvm::report_fatal_error("Unexpected producer."); | 5864 llvm::report_fatal_error("Unexpected producer."); |
| 5779 case Inst::Icmp: { | 5865 case Inst::Icmp: { |
| 5780 _mov(T, _0); | 5866 _mov(T, _0); |
| 5781 CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer)); | 5867 CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer)); |
| 5782 assert(Cond.WhenTrue0 != CondARM32::AL); | 5868 assert(Cond.WhenTrue0 != CondARM32::AL); |
| 5783 assert(Cond.WhenTrue0 != CondARM32::kNone); | 5869 assert(Cond.WhenTrue0 != CondARM32::kNone); |
| 5784 assert(Cond.WhenTrue1 == CondARM32::kNone); | 5870 assert(Cond.WhenTrue1 == CondARM32::kNone); |
| 5785 _mov_redefined(T, _1, Cond.WhenTrue0); | 5871 _mov_redefined(T, _1, Cond.WhenTrue0); |
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5877 return false; | 5963 return false; |
| 5878 case InstArithmetic::And: | 5964 case InstArithmetic::And: |
| 5879 return !isVectorType(Instr.getDest()->getType()); | 5965 return !isVectorType(Instr.getDest()->getType()); |
| 5880 case InstArithmetic::Or: | 5966 case InstArithmetic::Or: |
| 5881 return !isVectorType(Instr.getDest()->getType()); | 5967 return !isVectorType(Instr.getDest()->getType()); |
| 5882 } | 5968 } |
| 5883 } | 5969 } |
| 5884 } | 5970 } |
| 5885 } | 5971 } |
| 5886 } // end of namespace BoolFolding | 5972 } // end of namespace BoolFolding |
| 5973 |
| 5974 namespace FpFolding { |
| 5975 bool shouldTrackProducer(const Inst &Instr) { |
| 5976 switch (Instr.getKind()) { |
| 5977 default: |
| 5978 return false; |
| 5979 case Inst::Arithmetic: { |
| 5980 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { |
| 5981 default: |
| 5982 return false; |
| 5983 case InstArithmetic::Fmul: |
| 5984 return true; |
| 5985 } |
| 5986 } |
| 5987 } |
| 5988 } |
| 5989 |
| 5990 bool isValidConsumer(const Inst &Instr) { |
| 5991 switch (Instr.getKind()) { |
| 5992 default: |
| 5993 return false; |
| 5994 case Inst::Arithmetic: { |
| 5995 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { |
| 5996 default: |
| 5997 return false; |
| 5998 case InstArithmetic::Fadd: |
| 5999 case InstArithmetic::Fsub: |
| 6000 return true; |
| 6001 } |
| 6002 } |
| 6003 } |
| 6004 } |
| 6005 } // end of namespace FpFolding |
| 6006 |
| 6007 namespace IntFolding { |
| 6008 bool shouldTrackProducer(const Inst &Instr) { |
| 6009 switch (Instr.getKind()) { |
| 6010 default: |
| 6011 return false; |
| 6012 case Inst::Arithmetic: { |
| 6013 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { |
| 6014 default: |
| 6015 return false; |
| 6016 case InstArithmetic::Mul: |
| 6017 return true; |
| 6018 } |
| 6019 } |
| 6020 } |
| 6021 } |
| 6022 |
| 6023 bool isValidConsumer(const Inst &Instr) { |
| 6024 switch (Instr.getKind()) { |
| 6025 default: |
| 6026 return false; |
| 6027 case Inst::Arithmetic: { |
| 6028 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) { |
| 6029 default: |
| 6030 return false; |
| 6031 case InstArithmetic::Add: |
| 6032 case InstArithmetic::Sub: |
| 6033 return true; |
| 6034 } |
| 6035 } |
| 6036 } |
| 6037 } |
| 6038 } // end of namespace FpFolding |
| 5887 } // end of anonymous namespace | 6039 } // end of anonymous namespace |
| 5888 | 6040 |
| 5889 void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) { | 6041 void TargetARM32::ComputationTracker::recordProducers(CfgNode *Node) { |
| 5890 for (Inst &Instr : Node->getInsts()) { | 6042 for (Inst &Instr : Node->getInsts()) { |
| 5891 // Check whether Instr is a valid producer. | 6043 // Check whether Instr is a valid producer. |
| 5892 Variable *Dest = Instr.getDest(); | 6044 Variable *Dest = Instr.getDest(); |
| 5893 if (!Instr.isDeleted() // only consider non-deleted instructions; and | 6045 if (!Instr.isDeleted() // only consider non-deleted instructions; and |
| 5894 && Dest // only instructions with an actual dest var; and | 6046 && Dest // only instructions with an actual dest var; and |
| 5895 && Dest->getType() == IceType_i1 // only bool-type dest vars; and | 6047 && Dest->getType() == IceType_i1 // only bool-type dest vars; and |
| 5896 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr. | 6048 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr. |
| 5897 KnownComputations.emplace(Dest->getIndex(), BoolComputationEntry(&Instr)); | 6049 KnownComputations.emplace(Dest->getIndex(), |
| 6050 ComputationEntry(&Instr, IceType_i1)); |
| 6051 } |
| 6052 if (!Instr.isDeleted() // only consider non-deleted instructions; and |
| 6053 && Dest // only instructions with an actual dest var; and |
| 6054 && isScalarFloatingType(Dest->getType()) // fp-type only dest vars; and |
| 6055 && FpFolding::shouldTrackProducer(Instr)) { // white-listed instr. |
| 6056 KnownComputations.emplace(Dest->getIndex(), |
| 6057 ComputationEntry(&Instr, Dest->getType())); |
| 6058 } |
| 6059 if (!Instr.isDeleted() // only consider non-deleted instructions; and |
| 6060 && Dest // only instructions with an actual dest var; and |
| 6061 && Dest->getType() == IceType_i32 // i32 only dest vars; and |
| 6062 && IntFolding::shouldTrackProducer(Instr)) { // white-listed instr. |
| 6063 KnownComputations.emplace(Dest->getIndex(), |
| 6064 ComputationEntry(&Instr, IceType_i32)); |
| 5898 } | 6065 } |
| 5899 // Check each src variable against the map. | 6066 // Check each src variable against the map. |
| 5900 FOREACH_VAR_IN_INST(Var, Instr) { | 6067 FOREACH_VAR_IN_INST(Var, Instr) { |
| 5901 SizeT VarNum = Var->getIndex(); | 6068 SizeT VarNum = Var->getIndex(); |
| 5902 auto ComputationIter = KnownComputations.find(VarNum); | 6069 auto ComputationIter = KnownComputations.find(VarNum); |
| 5903 if (ComputationIter == KnownComputations.end()) { | 6070 if (ComputationIter == KnownComputations.end()) { |
| 5904 continue; | 6071 continue; |
| 5905 } | 6072 } |
| 5906 | 6073 |
| 5907 ++ComputationIter->second.NumUses; | 6074 ++ComputationIter->second.NumUses; |
| 5908 if (!BoolFolding::isValidConsumer(Instr)) { | 6075 switch (ComputationIter->second.ComputationType) { |
| 6076 default: |
| 5909 KnownComputations.erase(VarNum); | 6077 KnownComputations.erase(VarNum); |
| 5910 continue; | 6078 continue; |
| 6079 case IceType_i1: |
| 6080 if (!BoolFolding::isValidConsumer(Instr)) { |
| 6081 KnownComputations.erase(VarNum); |
| 6082 continue; |
| 6083 } |
| 6084 break; |
| 6085 case IceType_i32: |
| 6086 if (IndexOfVarInInst(Var) != 1 || !IntFolding::isValidConsumer(Instr)) { |
| 6087 KnownComputations.erase(VarNum); |
| 6088 continue; |
| 6089 } |
| 6090 break; |
| 6091 case IceType_f32: |
| 6092 case IceType_f64: |
| 6093 if (IndexOfVarInInst(Var) != 1 || !FpFolding::isValidConsumer(Instr)) { |
| 6094 KnownComputations.erase(VarNum); |
| 6095 continue; |
| 6096 } |
| 6097 break; |
| 5911 } | 6098 } |
| 5912 | 6099 |
| 5913 if (Instr.isLastUse(Var)) { | 6100 if (Instr.isLastUse(Var)) { |
| 5914 ComputationIter->second.IsLiveOut = false; | 6101 ComputationIter->second.IsLiveOut = false; |
| 5915 } | 6102 } |
| 5916 } | 6103 } |
| 5917 } | 6104 } |
| 5918 | 6105 |
| 5919 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end(); | 6106 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end(); |
| 5920 Iter != End;) { | 6107 Iter != End;) { |
| (...skipping 325 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6246 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 6433 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
| 6247 // However, for compatibility with current NaCl LLVM, don't claim that. | 6434 // However, for compatibility with current NaCl LLVM, don't claim that. |
| 6248 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 6435 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
| 6249 } | 6436 } |
| 6250 | 6437 |
| 6251 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; | 6438 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; |
| 6252 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; | 6439 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; |
| 6253 llvm::SmallBitVector TargetARM32::ScratchRegs; | 6440 llvm::SmallBitVector TargetARM32::ScratchRegs; |
| 6254 | 6441 |
| 6255 } // end of namespace Ice | 6442 } // end of namespace Ice |
| OLD | NEW |