Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(82)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1481133002: Subzero. ARM32. Show FP lowering some love. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 858 matching lines...) Expand 10 before | Expand all | Expand 10 after
869 } 869 }
870 870
871 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) { 871 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
872 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) 872 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
873 return false; 873 return false;
874 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed; 874 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed;
875 ++NumGPRRegsUsed; 875 ++NumGPRRegsUsed;
876 return true; 876 return true;
877 } 877 }
878 878
879 #define SANITY_CHECK_QS(_0, _1) \
880 static_assert((RegARM32::Reg_##_1 + 1) == RegARM32::Reg_##_0, \
881 "ARM32 " #_0 " and " #_1 " registers are declared " \
882 "incorrectly.")
883 SANITY_CHECK_QS(q0, q1);
884 SANITY_CHECK_QS(q1, q2);
885 SANITY_CHECK_QS(q2, q3);
886 SANITY_CHECK_QS(q3, q4);
887 SANITY_CHECK_QS(q4, q5);
888 SANITY_CHECK_QS(q5, q6);
889 SANITY_CHECK_QS(q6, q7);
890 SANITY_CHECK_QS(q7, q8);
891 SANITY_CHECK_QS(q8, q9);
892 SANITY_CHECK_QS(q9, q10);
893 SANITY_CHECK_QS(q10, q11);
894 SANITY_CHECK_QS(q11, q12);
895 SANITY_CHECK_QS(q12, q13);
896 SANITY_CHECK_QS(q13, q14);
897 SANITY_CHECK_QS(q14, q15);
898 #undef SANITY_CHECK_QS
899 #define SANITY_CHECK_DS(_0, _1) \
900 static_assert((RegARM32::Reg_##_1 + 1) == RegARM32::Reg_##_0, \
901 "ARM32 " #_0 " and " #_1 " registers are declared " \
902 "incorrectly.")
903 SANITY_CHECK_DS(d0, d1);
904 SANITY_CHECK_DS(d1, d2);
905 SANITY_CHECK_DS(d2, d3);
906 SANITY_CHECK_DS(d3, d4);
907 SANITY_CHECK_DS(d4, d5);
908 SANITY_CHECK_DS(d5, d6);
909 SANITY_CHECK_DS(d6, d7);
910 SANITY_CHECK_DS(d7, d8);
911 SANITY_CHECK_DS(d8, d9);
912 SANITY_CHECK_DS(d9, d10);
913 SANITY_CHECK_DS(d10, d11);
914 SANITY_CHECK_DS(d11, d12);
915 SANITY_CHECK_DS(d12, d13);
916 SANITY_CHECK_DS(d13, d14);
917 SANITY_CHECK_DS(d14, d15);
918 SANITY_CHECK_DS(d15, d16);
919 SANITY_CHECK_DS(d16, d17);
920 SANITY_CHECK_DS(d17, d18);
921 SANITY_CHECK_DS(d18, d19);
922 SANITY_CHECK_DS(d19, d20);
923 SANITY_CHECK_DS(d20, d21);
924 SANITY_CHECK_DS(d21, d22);
925 SANITY_CHECK_DS(d22, d23);
926 SANITY_CHECK_DS(d23, d24);
927 SANITY_CHECK_DS(d24, d25);
928 SANITY_CHECK_DS(d25, d26);
929 SANITY_CHECK_DS(d26, d27);
930 SANITY_CHECK_DS(d27, d28);
931 SANITY_CHECK_DS(d28, d29);
932 SANITY_CHECK_DS(d29, d30);
933 SANITY_CHECK_DS(d30, d31);
934 #undef SANITY_CHECK_DS
935 #define SANITY_CHECK_SS(_0, _1) \
936 static_assert((RegARM32::Reg_##_0 + 1) == RegARM32::Reg_##_1, \
937 "ARM32 " #_0 " and " #_1 " registers are declared " \
938 "incorrectly.")
939 SANITY_CHECK_SS(s0, s1);
940 SANITY_CHECK_SS(s1, s2);
941 SANITY_CHECK_SS(s2, s3);
942 SANITY_CHECK_SS(s3, s4);
943 SANITY_CHECK_SS(s4, s5);
944 SANITY_CHECK_SS(s5, s6);
945 SANITY_CHECK_SS(s6, s7);
946 SANITY_CHECK_SS(s7, s8);
947 SANITY_CHECK_SS(s8, s9);
948 SANITY_CHECK_SS(s9, s10);
949 SANITY_CHECK_SS(s10, s11);
950 SANITY_CHECK_SS(s11, s12);
951 SANITY_CHECK_SS(s12, s13);
952 SANITY_CHECK_SS(s13, s14);
953 SANITY_CHECK_SS(s14, s15);
954 SANITY_CHECK_SS(s15, s16);
955 SANITY_CHECK_SS(s16, s17);
956 SANITY_CHECK_SS(s17, s18);
957 SANITY_CHECK_SS(s18, s19);
958 SANITY_CHECK_SS(s19, s20);
959 SANITY_CHECK_SS(s20, s21);
960 SANITY_CHECK_SS(s21, s22);
961 SANITY_CHECK_SS(s22, s23);
962 SANITY_CHECK_SS(s23, s24);
963 SANITY_CHECK_SS(s24, s25);
964 SANITY_CHECK_SS(s25, s26);
965 SANITY_CHECK_SS(s26, s27);
966 SANITY_CHECK_SS(s27, s28);
967 SANITY_CHECK_SS(s28, s29);
968 SANITY_CHECK_SS(s29, s30);
969 SANITY_CHECK_SS(s30, s31);
970 #undef SANITY_CHECK_SS
971
879 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { 972 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
880 if (!VFPRegsFree.any()) { 973 if (!VFPRegsFree.any()) {
881 return false; 974 return false;
882 } 975 }
883 976
884 if (isVectorType(Ty)) { 977 if (isVectorType(Ty)) {
885 // Q registers are declared in reverse order, so RegARM32::Reg_q0 > 978 // Q registers are declared in reverse order, so RegARM32::Reg_q0 >
886 // RegARM32::Reg_q1. Therefore, we need to subtract QRegStart from Reg_q0. 979 // RegARM32::Reg_q1. Therefore, we need to subtract QRegStart from Reg_q0.
887 // Same thing goes for D registers. 980 // Same thing goes for D registers.
888 static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1,
889 "ARM32 Q registers are possibly declared incorrectly.");
890 981
891 int32_t QRegStart = (VFPRegsFree & ValidV128Regs).find_first(); 982 int32_t QRegStart = (VFPRegsFree & ValidV128Regs).find_first();
892 if (QRegStart >= 0) { 983 if (QRegStart >= 0) {
893 VFPRegsFree.reset(QRegStart, QRegStart + 4); 984 VFPRegsFree.reset(QRegStart, QRegStart + 4);
894 *Reg = RegARM32::Reg_q0 - (QRegStart / 4); 985 *Reg = RegARM32::Reg_q0 - (QRegStart / 4);
895 return true; 986 return true;
896 } 987 }
897 } else if (Ty == IceType_f64) { 988 } else if (Ty == IceType_f64) {
898 static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1,
899 "ARM32 D registers are possibly declared incorrectly.");
900
901 int32_t DRegStart = (VFPRegsFree & ValidF64Regs).find_first(); 989 int32_t DRegStart = (VFPRegsFree & ValidF64Regs).find_first();
902 if (DRegStart >= 0) { 990 if (DRegStart >= 0) {
903 VFPRegsFree.reset(DRegStart, DRegStart + 2); 991 VFPRegsFree.reset(DRegStart, DRegStart + 2);
904 *Reg = RegARM32::Reg_d0 - (DRegStart / 2); 992 *Reg = RegARM32::Reg_d0 - (DRegStart / 2);
905 return true; 993 return true;
906 } 994 }
907 } else { 995 } else {
908 static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1,
909 "ARM32 S registers are possibly declared incorrectly.");
910
911 assert(Ty == IceType_f32); 996 assert(Ty == IceType_f32);
912 int32_t SReg = VFPRegsFree.find_first(); 997 int32_t SReg = VFPRegsFree.find_first();
913 assert(SReg >= 0); 998 assert(SReg >= 0);
914 VFPRegsFree.reset(SReg); 999 VFPRegsFree.reset(SReg);
915 *Reg = RegARM32::Reg_s0 + SReg; 1000 *Reg = RegARM32::Reg_s0 + SReg;
916 return true; 1001 return true;
917 } 1002 }
918 1003
919 // Parameter allocation failed. From now on, every fp register must be placed 1004 // Parameter allocation failed. From now on, every fp register must be placed
920 // on the stack. We clear VFRegsFree in case there are any "holes" from S and 1005 // on the stack. We clear VFRegsFree in case there are any "holes" from S and
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after
1089 1174
1090 // Compute the list of spilled variables and bounds for GlobalsSize, etc. 1175 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1091 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, 1176 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1092 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, 1177 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1093 &LocalsSlotsAlignmentBytes, TargetVarHook); 1178 &LocalsSlotsAlignmentBytes, TargetVarHook);
1094 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; 1179 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1095 SpillAreaSizeBytes += GlobalsSize; 1180 SpillAreaSizeBytes += GlobalsSize;
1096 1181
1097 // Add push instructions for preserved registers. On ARM, "push" can push a 1182 // Add push instructions for preserved registers. On ARM, "push" can push a
1098 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has 1183 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
1099 // callee-saved float/vector registers. The "vpush" instruction can handle a 1184 // callee-saved float/vector registers.
Jim Stichnoth 2015/12/07 20:58:14 At first glance, this looks like a reflow error.
John 2015/12/08 13:54:25 Done.
1100 // whole list of float/vector registers, but it only handles contiguous 1185 // The "vpush" instruction can handle a whole list of float/vector registers,
1101 // sequences of registers by specifying the start and the length. 1186 // but it only handles contiguous sequences of registers by specifying the
1102 VarList GPRsToPreserve; 1187 // start and the length.
1103 GPRsToPreserve.reserve(CalleeSaves.size()); 1188 PreservedGPRs.reserve(CalleeSaves.size());
1104 uint32_t NumCallee = 0; 1189 PreservedSRegs.reserve(CalleeSaves.size());
1105 size_t PreservedRegsSizeBytes = 0; 1190
1106 // Consider FP and LR as callee-save / used as needed. 1191 // Consider FP and LR as callee-save / used as needed.
1107 if (UsesFramePointer) { 1192 if (UsesFramePointer) {
1193 if (RegsUsed[RegARM32::Reg_fp]) {
1194 llvm::report_fatal_error("Frame pointer has been used.");
1195 }
1108 CalleeSaves[RegARM32::Reg_fp] = true; 1196 CalleeSaves[RegARM32::Reg_fp] = true;
1109 assert(RegsUsed[RegARM32::Reg_fp] == false);
1110 RegsUsed[RegARM32::Reg_fp] = true; 1197 RegsUsed[RegARM32::Reg_fp] = true;
1111 } 1198 }
1112 if (!MaybeLeafFunc) { 1199 if (!MaybeLeafFunc) {
1113 CalleeSaves[RegARM32::Reg_lr] = true; 1200 CalleeSaves[RegARM32::Reg_lr] = true;
1114 RegsUsed[RegARM32::Reg_lr] = true; 1201 RegsUsed[RegARM32::Reg_lr] = true;
1115 } 1202 }
1203
1204 // Perform a two-pass over the used registers. The first pass records all the
Jim Stichnoth 2015/12/07 20:58:14 "Make two passes over the used registers."
John 2015/12/08 13:54:25 Done.
1205 // used registers -- and their aliases. Then, we figure out which GPRs and
1206 // VFP S registers should be saved. We don't bother saving D/Q registers
1207 // because their uses are recorded as S regs uses.
1208 llvm::SmallBitVector ToPreserve(RegARM32::Reg_NUM);
1116 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 1209 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1117 if (RegARM32::isI64RegisterPair(i)) { 1210 if (NeedSandboxing && i == RegARM32::Reg_r9) {
1118 // We don't save register pairs explicitly. Instead, we rely on the code 1211 // r9 is never updated in sandboxed code.
1119 // fake-defing/fake-using each register in the pair.
1120 continue; 1212 continue;
1121 } 1213 }
1122 if (CalleeSaves[i] && RegsUsed[i]) { 1214 if (CalleeSaves[i] && RegsUsed[i]) {
1123 if (NeedSandboxing && i == RegARM32::Reg_r9) { 1215 ToPreserve |= RegisterAliases[i];
1124 // r9 is never updated in sandboxed code. 1216 }
1217 }
1218
1219 uint32_t NumCallee = 0;
1220 size_t PreservedRegsSizeBytes = 0;
1221
1222 // RegClasses is a tuple of
1223 //
1224 // <First Register in Class, Last Register in Class, Vector of Save Registers>
1225 //
1226 // We use this tuple to figure out which register we should push/pop during
1227 // prolog/epilog.
1228 const std::tuple<uint32_t, uint32_t, VarList *> RegClasses[] = {
1229 {RegARM32::Reg_GPR_First, RegARM32::Reg_GPR_Last, &PreservedGPRs},
1230 {RegARM32::Reg_SREG_First, RegARM32::Reg_SREG_Last, &PreservedSRegs}};
1231 for (const auto &RegClass : RegClasses) {
1232 const uint32_t FirstRegInClass = std::get<0>(RegClass);
1233 const uint32_t LastRegInClass = std::get<1>(RegClass);
1234 VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1235 for (uint32_t Reg = FirstRegInClass; Reg <= LastRegInClass; ++Reg) {
1236 if (!ToPreserve[Reg]) {
1125 continue; 1237 continue;
1126 } 1238 }
1127 ++NumCallee; 1239 ++NumCallee;
1128 Variable *PhysicalRegister = getPhysicalRegister(i); 1240 Variable *PhysicalRegister = getPhysicalRegister(Reg);
1129 PreservedRegsSizeBytes += 1241 PreservedRegsSizeBytes +=
1130 typeWidthInBytesOnStack(PhysicalRegister->getType()); 1242 typeWidthInBytesOnStack(PhysicalRegister->getType());
1131 GPRsToPreserve.push_back(getPhysicalRegister(i)); 1243 PreservedRegsInClass->push_back(PhysicalRegister);
1132 } 1244 }
1133 } 1245 }
1246
1134 Ctx->statsUpdateRegistersSaved(NumCallee); 1247 Ctx->statsUpdateRegistersSaved(NumCallee);
1135 if (!GPRsToPreserve.empty()) 1248 if (!PreservedSRegs.empty())
1136 _push(GPRsToPreserve); 1249 _push(PreservedSRegs);
1250 if (!PreservedGPRs.empty())
1251 _push(PreservedGPRs);
1137 1252
1138 // Generate "mov FP, SP" if needed. 1253 // Generate "mov FP, SP" if needed.
1139 if (UsesFramePointer) { 1254 if (UsesFramePointer) {
1140 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); 1255 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1141 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 1256 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1142 _mov(FP, SP); 1257 _mov(FP, SP);
1143 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). 1258 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1144 Context.insert(InstFakeUse::create(Func, FP)); 1259 Context.insert(InstFakeUse::create(Func, FP));
1145 } 1260 }
1146 1261
1147 // Align the variables area. SpillAreaPaddingBytes is the size of the region 1262 // Align the variables area. SpillAreaPaddingBytes is the size of the region
1148 // after the preserved registers and before the spill areas. 1263 // after the preserved registers and before the spill areas.
1149 // LocalsSlotsPaddingBytes is the amount of padding between the globals and 1264 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1150 // locals area if they are separate. 1265 // locals area if they are separate.
1151 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES); 1266 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
1152 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); 1267 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1153 uint32_t SpillAreaPaddingBytes = 0; 1268 uint32_t SpillAreaPaddingBytes = 0;
1154 uint32_t LocalsSlotsPaddingBytes = 0; 1269 uint32_t LocalsSlotsPaddingBytes = 0;
1155 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, 1270 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1156 GlobalsSize, LocalsSlotsAlignmentBytes, 1271 GlobalsSize, LocalsSlotsAlignmentBytes,
1157 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); 1272 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1158 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; 1273 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1159 uint32_t GlobalsAndSubsequentPaddingSize = 1274 uint32_t GlobalsAndSubsequentPaddingSize =
1160 GlobalsSize + LocalsSlotsPaddingBytes; 1275 GlobalsSize + LocalsSlotsPaddingBytes;
1161 1276
1162 // Adds the out args space to the stack, and align SP if necessary. 1277 // Adds the out args space to the stack, and align SP if necessary.
1163 if (NeedsStackAlignment) { 1278 if (!NeedsStackAlignment) {
1279 SpillAreaSizeBytes += MaxOutArgsSizeBytes;
1280 } else {
1164 uint32_t StackOffset = PreservedRegsSizeBytes; 1281 uint32_t StackOffset = PreservedRegsSizeBytes;
1165 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); 1282 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1166 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes); 1283 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);
1167 SpillAreaSizeBytes = StackSize - StackOffset; 1284 SpillAreaSizeBytes = StackSize - StackOffset;
1168 } else {
1169 SpillAreaSizeBytes += MaxOutArgsSizeBytes;
1170 } 1285 }
1171 1286
1172 // Combine fixed alloca with SpillAreaSize. 1287 // Combine fixed alloca with SpillAreaSize.
1173 SpillAreaSizeBytes += FixedAllocaSizeBytes; 1288 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1174 1289
1175 // Generate "sub sp, SpillAreaSizeBytes" 1290 // Generate "sub sp, SpillAreaSizeBytes"
1176 if (SpillAreaSizeBytes) { 1291 if (SpillAreaSizeBytes) {
1177 // Use the scratch register if needed to legalize the immediate. 1292 // Use the scratch register if needed to legalize the immediate.
1178 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), 1293 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1179 Legal_Reg | Legal_Flex, getReservedTmpReg()); 1294 Legal_Reg | Legal_Flex, getReservedTmpReg());
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
1278 // add SP, SpillAreaSizeBytes 1393 // add SP, SpillAreaSizeBytes
1279 if (SpillAreaSizeBytes) { 1394 if (SpillAreaSizeBytes) {
1280 // Use the scratch register if needed to legalize the immediate. 1395 // Use the scratch register if needed to legalize the immediate.
1281 Operand *AddAmount = 1396 Operand *AddAmount =
1282 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), 1397 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1283 Legal_Reg | Legal_Flex, getReservedTmpReg()); 1398 Legal_Reg | Legal_Flex, getReservedTmpReg());
1284 Sandboxer(this).add_sp(AddAmount); 1399 Sandboxer(this).add_sp(AddAmount);
1285 } 1400 }
1286 } 1401 }
1287 1402
1288 // Add pop instructions for preserved registers. 1403 if (!PreservedGPRs.empty())
1289 llvm::SmallBitVector CalleeSaves = 1404 _pop(PreservedGPRs);
1290 getRegisterSet(RegSet_CalleeSave, RegSet_None); 1405 if (!PreservedSRegs.empty())
1291 VarList GPRsToRestore; 1406 _pop(PreservedSRegs);
1292 GPRsToRestore.reserve(CalleeSaves.size());
1293 // Consider FP and LR as callee-save / used as needed.
1294 if (UsesFramePointer) {
1295 CalleeSaves[RegARM32::Reg_fp] = true;
1296 }
1297 if (!MaybeLeafFunc) {
1298 CalleeSaves[RegARM32::Reg_lr] = true;
1299 }
1300 // Pop registers in ascending order just like push (instead of in reverse
1301 // order).
1302 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1303 if (RegARM32::isI64RegisterPair(i)) {
1304 continue;
1305 }
1306
1307 if (CalleeSaves[i] && RegsUsed[i]) {
1308 if (NeedSandboxing && i == RegARM32::Reg_r9) {
1309 continue;
1310 }
1311 GPRsToRestore.push_back(getPhysicalRegister(i));
1312 }
1313 }
1314 if (!GPRsToRestore.empty())
1315 _pop(GPRsToRestore);
1316 1407
1317 if (!Ctx->getFlags().getUseSandboxing()) 1408 if (!Ctx->getFlags().getUseSandboxing())
1318 return; 1409 return;
1319 1410
1320 // Change the original ret instruction into a sandboxed return sequence. 1411 // Change the original ret instruction into a sandboxed return sequence.
1412 //
1321 // bundle_lock 1413 // bundle_lock
1322 // bic lr, #0xc000000f 1414 // bic lr, #0xc000000f
1323 // bx lr 1415 // bx lr
1324 // bundle_unlock 1416 // bundle_unlock
1417 //
1325 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to 1418 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
1326 // restrict to the lower 1GB as well. 1419 // restrict to the lower 1GB as well.
1327 Variable *LR = getPhysicalRegister(RegARM32::Reg_lr); 1420 Variable *LR = getPhysicalRegister(RegARM32::Reg_lr);
1328 Variable *RetValue = nullptr; 1421 Variable *RetValue = nullptr;
1329 if (RI->getSrcSize()) 1422 if (RI->getSrcSize())
1330 RetValue = llvm::cast<Variable>(RI->getSrc(0)); 1423 RetValue = llvm::cast<Variable>(RI->getSrc(0));
1331 1424
1332 Sandboxer(this).ret(LR, RetValue); 1425 Sandboxer(this).ret(LR, RetValue);
1333 1426
1334 RI->setDeleted(); 1427 RI->setDeleted();
(...skipping 1299 matching lines...) Expand 10 before | Expand all | Expand 10 after
2634 Operations)) { 2727 Operations)) {
2635 return false; 2728 return false;
2636 } 2729 }
2637 } 2730 }
2638 2731
2639 return Src == 0; 2732 return Src == 0;
2640 } 2733 }
2641 } // end of namespace StrengthReduction 2734 } // end of namespace StrengthReduction
2642 } // end of anonymous namespace 2735 } // end of anonymous namespace
2643 2736
2644 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { 2737 void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
2645 Variable *Dest = Inst->getDest(); 2738 Variable *Dest = Instr->getDest();
2646 2739
2647 if (Dest->isRematerializable()) { 2740 if (Dest->isRematerializable()) {
2648 Context.insert(InstFakeDef::create(Func, Dest)); 2741 Context.insert(InstFakeDef::create(Func, Dest));
2649 return; 2742 return;
2650 } 2743 }
2651 2744
2652 Type DestTy = Dest->getType(); 2745 Type DestTy = Dest->getType();
2653 if (DestTy == IceType_i1) { 2746 if (DestTy == IceType_i1) {
2654 lowerInt1Arithmetic(Inst); 2747 lowerInt1Arithmetic(Instr);
2655 return; 2748 return;
2656 } 2749 }
2657 2750
2658 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); 2751 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
2659 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); 2752 Operand *Src1 = legalizeUndef(Instr->getSrc(1));
2660 if (DestTy == IceType_i64) { 2753 if (DestTy == IceType_i64) {
2661 lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1); 2754 lowerInt64Arithmetic(Instr->getOp(), Instr->getDest(), Src0, Src1);
2662 return; 2755 return;
2663 } 2756 }
2664 2757
2665 if (isVectorType(DestTy)) { 2758 if (isVectorType(DestTy)) {
2666 // Add a fake def to keep liveness consistent in the meantime. 2759 // Add a fake def to keep liveness consistent in the meantime.
2667 Variable *T = makeReg(DestTy); 2760 Variable *T = makeReg(DestTy);
2668 Context.insert(InstFakeDef::create(Func, T)); 2761 Context.insert(InstFakeDef::create(Func, T));
2669 _mov(Dest, T); 2762 _mov(Dest, T);
2670 UnimplementedError(Func->getContext()->getFlags()); 2763 UnimplementedError(Func->getContext()->getFlags());
2671 return; 2764 return;
2672 } 2765 }
2673 2766
2674 // DestTy is a non-i64 scalar. 2767 // DestTy is a non-i64 scalar.
2675 Variable *T = makeReg(DestTy); 2768 Variable *T = makeReg(DestTy);
2676 2769
2677 // * Handle div/rem separately. They require a non-legalized Src1 to inspect 2770 // * Handle div/rem separately. They require a non-legalized Src1 to inspect
2678 // whether or not Src1 is a non-zero constant. Once legalized it is more 2771 // whether or not Src1 is a non-zero constant. Once legalized it is more
2679 // difficult to determine (constant may be moved to a register). 2772 // difficult to determine (constant may be moved to a register).
2680 // * Handle floating point arithmetic separately: they require Src1 to be 2773 // * Handle floating point arithmetic separately: they require Src1 to be
2681 // legalized to a register. 2774 // legalized to a register.
2682 switch (Inst->getOp()) { 2775 switch (Instr->getOp()) {
2683 default: 2776 default:
2684 break; 2777 break;
2685 case InstArithmetic::Udiv: { 2778 case InstArithmetic::Udiv: {
2686 constexpr bool NotRemainder = false; 2779 constexpr bool NotRemainder = false;
2687 Variable *Src0R = legalizeToReg(Src0); 2780 Variable *Src0R = legalizeToReg(Src0);
2688 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, 2781 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
2689 NotRemainder); 2782 NotRemainder);
2690 return; 2783 return;
2691 } 2784 }
2692 case InstArithmetic::Sdiv: { 2785 case InstArithmetic::Sdiv: {
(...skipping 18 matching lines...) Expand all
2711 return; 2804 return;
2712 } 2805 }
2713 case InstArithmetic::Frem: { 2806 case InstArithmetic::Frem: {
2714 if (!isScalarFloatingType(DestTy)) { 2807 if (!isScalarFloatingType(DestTy)) {
2715 llvm::report_fatal_error("Unexpected type when lowering frem."); 2808 llvm::report_fatal_error("Unexpected type when lowering frem.");
2716 } 2809 }
2717 llvm::report_fatal_error("Frem should have already been lowered."); 2810 llvm::report_fatal_error("Frem should have already been lowered.");
2718 } 2811 }
2719 case InstArithmetic::Fadd: { 2812 case InstArithmetic::Fadd: {
2720 Variable *Src0R = legalizeToReg(Src0); 2813 Variable *Src0R = legalizeToReg(Src0);
2814 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
2815 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
2816 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
2817 _vmla(Src0R, Src1R, Src2R);
2818 _mov(Dest, Src0R);
2819 return;
2820 }
2821
2721 Variable *Src1R = legalizeToReg(Src1); 2822 Variable *Src1R = legalizeToReg(Src1);
2722 _vadd(T, Src0R, Src1R); 2823 _vadd(T, Src0R, Src1R);
2723 _mov(Dest, T); 2824 _mov(Dest, T);
2724 return; 2825 return;
2725 } 2826 }
2726 case InstArithmetic::Fsub: { 2827 case InstArithmetic::Fsub: {
2727 Variable *Src0R = legalizeToReg(Src0); 2828 Variable *Src0R = legalizeToReg(Src0);
2829 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
2830 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
2831 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
2832 _vmls(Src0R, Src1R, Src2R);
2833 _mov(Dest, Src0R);
2834 return;
2835 }
2728 Variable *Src1R = legalizeToReg(Src1); 2836 Variable *Src1R = legalizeToReg(Src1);
2729 _vsub(T, Src0R, Src1R); 2837 _vsub(T, Src0R, Src1R);
2730 _mov(Dest, T); 2838 _mov(Dest, T);
2731 return; 2839 return;
2732 } 2840 }
2733 case InstArithmetic::Fmul: { 2841 case InstArithmetic::Fmul: {
2734 Variable *Src0R = legalizeToReg(Src0); 2842 Variable *Src0R = legalizeToReg(Src0);
2735 Variable *Src1R = legalizeToReg(Src1); 2843 Variable *Src1R = legalizeToReg(Src1);
2736 _vmul(T, Src0R, Src1R); 2844 _vmul(T, Src0R, Src1R);
2737 _mov(Dest, T); 2845 _mov(Dest, T);
2738 return; 2846 return;
2739 } 2847 }
2740 case InstArithmetic::Fdiv: { 2848 case InstArithmetic::Fdiv: {
2741 Variable *Src0R = legalizeToReg(Src0); 2849 Variable *Src0R = legalizeToReg(Src0);
2742 Variable *Src1R = legalizeToReg(Src1); 2850 Variable *Src1R = legalizeToReg(Src1);
2743 _vdiv(T, Src0R, Src1R); 2851 _vdiv(T, Src0R, Src1R);
2744 _mov(Dest, T); 2852 _mov(Dest, T);
2745 return; 2853 return;
2746 } 2854 }
2747 } 2855 }
2748 2856
2749 // Handle everything else here. 2857 // Handle everything else here.
2750 Int32Operands Srcs(Src0, Src1); 2858 Int32Operands Srcs(Src0, Src1);
2751 switch (Inst->getOp()) { 2859 switch (Instr->getOp()) {
2752 case InstArithmetic::_num: 2860 case InstArithmetic::_num:
2753 llvm::report_fatal_error("Unknown arithmetic operator"); 2861 llvm::report_fatal_error("Unknown arithmetic operator");
2754 return; 2862 return;
2755 case InstArithmetic::Add: { 2863 case InstArithmetic::Add: {
2864 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
2865 Variable *Src0R = legalizeToReg(Src0);
2866 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
2867 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
2868 _mla(T, Src1R, Src2R, Src0R);
2869 _mov(Dest, T);
2870 return;
2871 }
2872
2756 if (Srcs.hasConstOperand()) { 2873 if (Srcs.hasConstOperand()) {
2757 if (!Srcs.immediateIsFlexEncodable() && 2874 if (!Srcs.immediateIsFlexEncodable() &&
2758 Srcs.negatedImmediateIsFlexEncodable()) { 2875 Srcs.negatedImmediateIsFlexEncodable()) {
2759 Variable *Src0R = Srcs.src0R(this); 2876 Variable *Src0R = Srcs.src0R(this);
2760 Operand *Src1F = Srcs.negatedSrc1F(this); 2877 Operand *Src1F = Srcs.negatedSrc1F(this);
2761 if (!Srcs.swappedOperands()) { 2878 if (!Srcs.swappedOperands()) {
2762 _sub(T, Src0R, Src1F); 2879 _sub(T, Src0R, Src1F);
2763 } else { 2880 } else {
2764 _rsb(T, Src0R, Src1F); 2881 _rsb(T, Src0R, Src1F);
2765 } 2882 }
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
2798 return; 2915 return;
2799 } 2916 }
2800 case InstArithmetic::Xor: { 2917 case InstArithmetic::Xor: {
2801 Variable *Src0R = Srcs.src0R(this); 2918 Variable *Src0R = Srcs.src0R(this);
2802 Operand *Src1RF = Srcs.src1RF(this); 2919 Operand *Src1RF = Srcs.src1RF(this);
2803 _eor(T, Src0R, Src1RF); 2920 _eor(T, Src0R, Src1RF);
2804 _mov(Dest, T); 2921 _mov(Dest, T);
2805 return; 2922 return;
2806 } 2923 }
2807 case InstArithmetic::Sub: { 2924 case InstArithmetic::Sub: {
2925 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
2926 Variable *Src0R = legalizeToReg(Src0);
2927 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
2928 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
2929 _mls(T, Src1R, Src2R, Src0R);
2930 _mov(Dest, T);
2931 return;
2932 }
2933
2808 if (Srcs.hasConstOperand()) { 2934 if (Srcs.hasConstOperand()) {
2809 if (Srcs.immediateIsFlexEncodable()) { 2935 if (Srcs.immediateIsFlexEncodable()) {
2810 Variable *Src0R = Srcs.src0R(this); 2936 Variable *Src0R = Srcs.src0R(this);
2811 Operand *Src1RF = Srcs.src1RF(this); 2937 Operand *Src1RF = Srcs.src1RF(this);
2812 if (Srcs.swappedOperands()) { 2938 if (Srcs.swappedOperands()) {
2813 _rsb(T, Src0R, Src1RF); 2939 _rsb(T, Src0R, Src1RF);
2814 } else { 2940 } else {
2815 _sub(T, Src0R, Src1RF); 2941 _sub(T, Src0R, Src1RF);
2816 } 2942 }
2817 _mov(Dest, T); 2943 _mov(Dest, T);
(...skipping 188 matching lines...) Expand 10 before | Expand all | Expand 10 after
3006 } 3132 }
3007 _mov(Dest, NewSrc); 3133 _mov(Dest, NewSrc);
3008 } 3134 }
3009 3135
3010 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( 3136 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
3011 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, 3137 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
3012 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { 3138 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {
3013 InstARM32Label *NewShortCircuitLabel = nullptr; 3139 InstARM32Label *NewShortCircuitLabel = nullptr;
3014 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); 3140 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3015 3141
3016 const Inst *Producer = BoolComputations.getProducerOf(Boolean); 3142 const Inst *Producer = Computations.getProducerOf(Boolean);
3017 3143
3018 if (Producer == nullptr) { 3144 if (Producer == nullptr) {
3019 // No producer, no problem: just do emit code to perform (Boolean & 1) and 3145 // No producer, no problem: just do emit code to perform (Boolean & 1) and
3020 // set the flags register. The branch should be taken if the resulting flags 3146 // set the flags register. The branch should be taken if the resulting flags
3021 // indicate a non-zero result. 3147 // indicate a non-zero result.
3022 _tst(legalizeToReg(Boolean), _1); 3148 _tst(legalizeToReg(Boolean), _1);
3023 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE)); 3149 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3024 } 3150 }
3025 3151
3026 switch (Producer->getKind()) { 3152 switch (Producer->getKind()) {
(...skipping 200 matching lines...) Expand 10 before | Expand all | Expand 10 after
3227 Variable *ReturnReg = nullptr; 3353 Variable *ReturnReg = nullptr;
3228 Variable *ReturnRegHi = nullptr; 3354 Variable *ReturnRegHi = nullptr;
3229 if (Dest) { 3355 if (Dest) {
3230 switch (Dest->getType()) { 3356 switch (Dest->getType()) {
3231 case IceType_NUM: 3357 case IceType_NUM:
3232 llvm::report_fatal_error("Invalid Call dest type"); 3358 llvm::report_fatal_error("Invalid Call dest type");
3233 break; 3359 break;
3234 case IceType_void: 3360 case IceType_void:
3235 break; 3361 break;
3236 case IceType_i1: 3362 case IceType_i1:
3237 assert(BoolComputations.getProducerOf(Dest) == nullptr); 3363 assert(Computations.getProducerOf(Dest) == nullptr);
3238 // Fall-through intended. 3364 // Fall-through intended.
3239 case IceType_i8: 3365 case IceType_i8:
3240 case IceType_i16: 3366 case IceType_i16:
3241 case IceType_i32: 3367 case IceType_i32:
3242 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0); 3368 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
3243 break; 3369 break;
3244 case IceType_i64: 3370 case IceType_i64:
3245 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0); 3371 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
3246 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1); 3372 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
3247 break; 3373 break;
(...skipping 2157 matching lines...) Expand 10 before | Expand all | Expand 10 after
5405 } 5531 }
5406 5532
5407 if (llvm::isa<Constant>(From)) { 5533 if (llvm::isa<Constant>(From)) {
5408 if (llvm::isa<ConstantUndef>(From)) { 5534 if (llvm::isa<ConstantUndef>(From)) {
5409 From = legalizeUndef(From, RegNum); 5535 From = legalizeUndef(From, RegNum);
5410 if (isVectorType(Ty)) 5536 if (isVectorType(Ty))
5411 return From; 5537 return From;
5412 } 5538 }
5413 // There should be no constants of vector type (other than undef). 5539 // There should be no constants of vector type (other than undef).
5414 assert(!isVectorType(Ty)); 5540 assert(!isVectorType(Ty));
5415 bool CanBeFlex = Allowed & Legal_Flex;
5416 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { 5541 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
5417 uint32_t RotateAmt; 5542 uint32_t RotateAmt;
5418 uint32_t Immed_8; 5543 uint32_t Immed_8;
5419 uint32_t Value = static_cast<uint32_t>(C32->getValue()); 5544 uint32_t Value = static_cast<uint32_t>(C32->getValue());
5420 // Check if the immediate will fit in a Flexible second operand, if a 5545 if (OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
5421 // Flexible second operand is allowed. We need to know the exact value, 5546 // The immediate can be encoded as a Flex immediate. We may return the
5422 // so that rules out relocatable constants. Also try the inverse and use 5547 // Flex operand if the caller has Allow'ed it.
5423 // MVN if possible. 5548 auto *OpF = OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
5424 if (CanBeFlex && 5549 const bool CanBeFlex = Allowed & Legal_Flex;
5425 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) { 5550 if (CanBeFlex)
5426 return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); 5551 return OpF;
5427 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm( 5552 return copyToReg(OpF, RegNum);
5428 ~Value, &RotateAmt, &Immed_8)) { 5553 } else if (OperandARM32FlexImm::canHoldImm(~Value, &RotateAmt,
Jim Stichnoth 2015/12/07 20:58:14 No "else if" after unconditional return.
John 2015/12/08 13:54:24 In my defense, this was here prior to this CL. :P
5429 auto InvertedFlex = 5554 &Immed_8)) {
5555 // Even though the immediate can't be encoded as a Flex operand, its
5556 // inverted bit pattern can, thus we use ARM's mvn to load the 32-bit
5557 // constant with a single instruction.
5558 auto *InvOpF =
5430 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); 5559 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
5431 Variable *Reg = makeReg(Ty, RegNum); 5560 Variable *Reg = makeReg(Ty, RegNum);
5432 _mvn(Reg, InvertedFlex); 5561 _mvn(Reg, InvOpF);
5433 return Reg; 5562 return Reg;
5434 } else { 5563 } else {
5435 // Do a movw/movt to a register. 5564 // Do a movw/movt to a register.
5436 Variable *Reg = makeReg(Ty, RegNum); 5565 Variable *Reg = makeReg(Ty, RegNum);
5437 uint32_t UpperBits = (Value >> 16) & 0xFFFF; 5566 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
5438 _movw(Reg, 5567 _movw(Reg,
5439 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); 5568 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
5440 if (UpperBits != 0) { 5569 if (UpperBits != 0) {
5441 _movt(Reg, Ctx->getConstantInt32(UpperBits)); 5570 _movt(Reg, Ctx->getConstantInt32(UpperBits));
5442 } 5571 }
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
5479 return copyToReg(From, RegNum); 5608 return copyToReg(From, RegNum);
5480 } 5609 }
5481 } 5610 }
5482 5611
5483 if (auto *Var = llvm::dyn_cast<Variable>(From)) { 5612 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
5484 if (Var->isRematerializable()) { 5613 if (Var->isRematerializable()) {
5485 if (Allowed & Legal_Rematerializable) { 5614 if (Allowed & Legal_Rematerializable) {
5486 return From; 5615 return From;
5487 } 5616 }
5488 5617
5489 // TODO(jpp): We don't need to rematerialize Var if legalize() was invoked
5490 // for a Variable in a Mem operand.
5491 Variable *T = makeReg(Var->getType(), RegNum); 5618 Variable *T = makeReg(Var->getType(), RegNum);
5492 _mov(T, Var); 5619 _mov(T, Var);
5493 return T; 5620 return T;
5494 } 5621 }
5495 // Check if the variable is guaranteed a physical register. This can happen 5622 // Check if the variable is guaranteed a physical register. This can happen
5496 // either when the variable is pre-colored or when it is assigned infinite 5623 // either when the variable is pre-colored or when it is assigned infinite
5497 // weight. 5624 // weight.
5498 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); 5625 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
5499 // We need a new physical register for the operand if: 5626 // We need a new physical register for the operand if:
5500 // Mem is not allowed and Var isn't guaranteed a physical 5627 // Mem is not allowed and Var isn't guaranteed a physical
(...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after
5681 5808
5682 _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex)); 5809 _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex));
5683 if (DestHi) { 5810 if (DestHi) {
5684 _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex)); 5811 _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex));
5685 } 5812 }
5686 5813
5687 CondWhenTrue Cond(CondARM32::kNone); 5814 CondWhenTrue Cond(CondARM32::kNone);
5688 // FlagsWereSet is used to determine wether Boolean was folded or not. If not, 5815 // FlagsWereSet is used to determine wether Boolean was folded or not. If not,
5689 // add an explicit _tst instruction below. 5816 // add an explicit _tst instruction below.
5690 bool FlagsWereSet = false; 5817 bool FlagsWereSet = false;
5691 if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) { 5818 if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
5692 switch (Producer->getKind()) { 5819 switch (Producer->getKind()) {
5693 default: 5820 default:
5694 llvm::report_fatal_error("Unexpected producer."); 5821 llvm::report_fatal_error("Unexpected producer.");
5695 case Inst::Icmp: { 5822 case Inst::Icmp: {
5696 Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer)); 5823 Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
5697 FlagsWereSet = true; 5824 FlagsWereSet = true;
5698 } break; 5825 } break;
5699 case Inst::Fcmp: { 5826 case Inst::Fcmp: {
5700 Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer)); 5827 Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
5701 FlagsWereSet = true; 5828 FlagsWereSet = true;
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
5765 5892
5766 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest, 5893 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest,
5767 Operand *Boolean) { 5894 Operand *Boolean) {
5768 assert(Boolean->getType() == IceType_i1); 5895 assert(Boolean->getType() == IceType_i1);
5769 Variable *T = makeReg(IceType_i1); 5896 Variable *T = makeReg(IceType_i1);
5770 Operand *_0 = 5897 Operand *_0 =
5771 legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex); 5898 legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex);
5772 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); 5899 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
5773 5900
5774 SafeBoolChain Safe = SBC_Yes; 5901 SafeBoolChain Safe = SBC_Yes;
5775 if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) { 5902 if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
5776 switch (Producer->getKind()) { 5903 switch (Producer->getKind()) {
5777 default: 5904 default:
5778 llvm::report_fatal_error("Unexpected producer."); 5905 llvm::report_fatal_error("Unexpected producer.");
5779 case Inst::Icmp: { 5906 case Inst::Icmp: {
5780 _mov(T, _0); 5907 _mov(T, _0);
5781 CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer)); 5908 CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
5782 assert(Cond.WhenTrue0 != CondARM32::AL); 5909 assert(Cond.WhenTrue0 != CondARM32::AL);
5783 assert(Cond.WhenTrue0 != CondARM32::kNone); 5910 assert(Cond.WhenTrue0 != CondARM32::kNone);
5784 assert(Cond.WhenTrue1 == CondARM32::kNone); 5911 assert(Cond.WhenTrue1 == CondARM32::kNone);
5785 _mov_redefined(T, _1, Cond.WhenTrue0); 5912 _mov_redefined(T, _1, Cond.WhenTrue0);
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
5877 return false; 6004 return false;
5878 case InstArithmetic::And: 6005 case InstArithmetic::And:
5879 return !isVectorType(Instr.getDest()->getType()); 6006 return !isVectorType(Instr.getDest()->getType());
5880 case InstArithmetic::Or: 6007 case InstArithmetic::Or:
5881 return !isVectorType(Instr.getDest()->getType()); 6008 return !isVectorType(Instr.getDest()->getType());
5882 } 6009 }
5883 } 6010 }
5884 } 6011 }
5885 } 6012 }
5886 } // end of namespace BoolFolding 6013 } // end of namespace BoolFolding
6014
6015 namespace FpFolding {
6016 bool shouldTrackProducer(const Inst &Instr) {
6017 switch (Instr.getKind()) {
6018 default:
6019 return false;
6020 case Inst::Arithmetic: {
6021 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6022 default:
6023 return false;
6024 case InstArithmetic::Fmul:
6025 return true;
6026 }
6027 }
6028 }
6029 }
6030
6031 bool isValidConsumer(const Inst &Instr) {
6032 switch (Instr.getKind()) {
6033 default:
6034 return false;
6035 case Inst::Arithmetic: {
6036 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6037 default:
6038 return false;
6039 case InstArithmetic::Fadd:
6040 case InstArithmetic::Fsub:
6041 return true;
6042 }
6043 }
6044 }
6045 }
6046 } // end of namespace FpFolding
6047
6048 namespace IntFolding {
6049 bool shouldTrackProducer(const Inst &Instr) {
6050 switch (Instr.getKind()) {
6051 default:
6052 return false;
6053 case Inst::Arithmetic: {
6054 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6055 default:
6056 return false;
6057 case InstArithmetic::Mul:
6058 return true;
6059 }
6060 }
6061 }
6062 }
6063
6064 bool isValidConsumer(const Inst &Instr) {
6065 switch (Instr.getKind()) {
6066 default:
6067 return false;
6068 case Inst::Arithmetic: {
6069 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6070 default:
6071 return false;
6072 case InstArithmetic::Add:
6073 case InstArithmetic::Sub:
6074 return true;
6075 }
6076 }
6077 }
6078 }
6079 } // end of namespace FpFolding
5887 } // end of anonymous namespace 6080 } // end of anonymous namespace
5888 6081
5889 void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) { 6082 void TargetARM32::ComputationTracker::recordProducers(CfgNode *Node) {
5890 for (Inst &Instr : Node->getInsts()) { 6083 for (Inst &Instr : Node->getInsts()) {
5891 // Check whether Instr is a valid producer. 6084 // Check whether Instr is a valid producer.
5892 Variable *Dest = Instr.getDest(); 6085 Variable *Dest = Instr.getDest();
5893 if (!Instr.isDeleted() // only consider non-deleted instructions; and 6086 if (!Instr.isDeleted() // only consider non-deleted instructions; and
5894 && Dest // only instructions with an actual dest var; and 6087 && Dest // only instructions with an actual dest var; and
5895 && Dest->getType() == IceType_i1 // only bool-type dest vars; and 6088 && Dest->getType() == IceType_i1 // only bool-type dest vars; and
5896 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr. 6089 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
5897 KnownComputations.emplace(Dest->getIndex(), BoolComputationEntry(&Instr)); 6090 KnownComputations.emplace(Dest->getIndex(),
6091 ComputationEntry(&Instr, IceType_i1));
6092 }
6093 if (!Instr.isDeleted() // only consider non-deleted instructions; and
6094 && Dest // only instructions with an actual dest var; and
6095 && isScalarFloatingType(Dest->getType()) // fp-type only dest vars; and
6096 && FpFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6097 KnownComputations.emplace(Dest->getIndex(),
6098 ComputationEntry(&Instr, Dest->getType()));
6099 }
6100 if (!Instr.isDeleted() // only consider non-deleted instructions; and
6101 && Dest // only instructions with an actual dest var; and
6102 && Dest->getType() == IceType_i32 // i32 only dest vars; and
6103 && IntFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6104 KnownComputations.emplace(Dest->getIndex(),
6105 ComputationEntry(&Instr, IceType_i32));
5898 } 6106 }
5899 // Check each src variable against the map. 6107 // Check each src variable against the map.
5900 FOREACH_VAR_IN_INST(Var, Instr) { 6108 FOREACH_VAR_IN_INST(Var, Instr) {
5901 SizeT VarNum = Var->getIndex(); 6109 SizeT VarNum = Var->getIndex();
5902 auto ComputationIter = KnownComputations.find(VarNum); 6110 auto ComputationIter = KnownComputations.find(VarNum);
5903 if (ComputationIter == KnownComputations.end()) { 6111 if (ComputationIter == KnownComputations.end()) {
5904 continue; 6112 continue;
5905 } 6113 }
5906 6114
5907 ++ComputationIter->second.NumUses; 6115 ++ComputationIter->second.NumUses;
5908 if (!BoolFolding::isValidConsumer(Instr)) { 6116 switch (ComputationIter->second.ComputationType) {
6117 default:
5909 KnownComputations.erase(VarNum); 6118 KnownComputations.erase(VarNum);
5910 continue; 6119 continue;
6120 case IceType_i1:
6121 if (!BoolFolding::isValidConsumer(Instr)) {
6122 KnownComputations.erase(VarNum);
6123 continue;
6124 }
6125 break;
6126 case IceType_i32:
6127 if (IndexOfVarInInst(Var) != 1 || !IntFolding::isValidConsumer(Instr)) {
6128 KnownComputations.erase(VarNum);
6129 continue;
6130 }
6131 break;
6132 case IceType_f32:
6133 case IceType_f64:
6134 if (IndexOfVarInInst(Var) != 1 || !FpFolding::isValidConsumer(Instr)) {
6135 KnownComputations.erase(VarNum);
6136 continue;
6137 }
6138 break;
5911 } 6139 }
5912 6140
5913 if (Instr.isLastUse(Var)) { 6141 if (Instr.isLastUse(Var)) {
5914 ComputationIter->second.IsLiveOut = false; 6142 ComputationIter->second.IsLiveOut = false;
5915 } 6143 }
5916 } 6144 }
5917 } 6145 }
5918 6146
5919 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end(); 6147 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
5920 Iter != End;) { 6148 Iter != End;) {
(...skipping 325 matching lines...) Expand 10 before | Expand all | Expand 10 after
6246 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 6474 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
6247 // However, for compatibility with current NaCl LLVM, don't claim that. 6475 // However, for compatibility with current NaCl LLVM, don't claim that.
6248 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 6476 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
6249 } 6477 }
6250 6478
6251 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; 6479 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];
6252 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; 6480 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
6253 llvm::SmallBitVector TargetARM32::ScratchRegs; 6481 llvm::SmallBitVector TargetARM32::ScratchRegs;
6254 6482
6255 } // end of namespace Ice 6483 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698