Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(93)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1481133002: Subzero. ARM32. Show FP lowering some love. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Handles comments; git pull; fixes lit tests. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/assembler/arm32/mov-const.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 858 matching lines...) Expand 10 before | Expand all | Expand 10 after
869 } 869 }
870 870
871 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) { 871 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
872 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) 872 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
873 return false; 873 return false;
874 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed; 874 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed;
875 ++NumGPRRegsUsed; 875 ++NumGPRRegsUsed;
876 return true; 876 return true;
877 } 877 }
878 878
879 // The calling convention helper class (TargetARM32::CallingConv) expects the
880 // following registers to be declared in a certain order, so we have these
881 // sanity checks to ensure nothing breaks unknowingly.
882 // TODO(jpp): modify the CallingConv class so it does not rely on any register
883 // declaration order.
884 #define SANITY_CHECK_QS(_0, _1) \
885 static_assert((RegARM32::Reg_##_1 + 1) == RegARM32::Reg_##_0, \
886 "ARM32 " #_0 " and " #_1 " registers are declared " \
887 "incorrectly.")
888 SANITY_CHECK_QS(q0, q1);
889 SANITY_CHECK_QS(q1, q2);
890 SANITY_CHECK_QS(q2, q3);
891 SANITY_CHECK_QS(q3, q4);
892 #undef SANITY_CHECK_QS
893 #define SANITY_CHECK_DS(_0, _1) \
894 static_assert((RegARM32::Reg_##_1 + 1) == RegARM32::Reg_##_0, \
895 "ARM32 " #_0 " and " #_1 " registers are declared " \
896 "incorrectly.")
897 SANITY_CHECK_DS(d0, d1);
898 SANITY_CHECK_DS(d1, d2);
899 SANITY_CHECK_DS(d2, d3);
900 SANITY_CHECK_DS(d3, d4);
901 SANITY_CHECK_DS(d4, d5);
902 SANITY_CHECK_DS(d5, d6);
903 SANITY_CHECK_DS(d6, d7);
904 SANITY_CHECK_DS(d7, d8);
905 #undef SANITY_CHECK_DS
906 #define SANITY_CHECK_SS(_0, _1) \
907 static_assert((RegARM32::Reg_##_0 + 1) == RegARM32::Reg_##_1, \
908 "ARM32 " #_0 " and " #_1 " registers are declared " \
909 "incorrectly.")
910 SANITY_CHECK_SS(s0, s1);
911 SANITY_CHECK_SS(s1, s2);
912 SANITY_CHECK_SS(s2, s3);
913 SANITY_CHECK_SS(s3, s4);
914 SANITY_CHECK_SS(s4, s5);
915 SANITY_CHECK_SS(s5, s6);
916 SANITY_CHECK_SS(s6, s7);
917 SANITY_CHECK_SS(s7, s8);
918 SANITY_CHECK_SS(s8, s9);
919 SANITY_CHECK_SS(s9, s10);
920 SANITY_CHECK_SS(s10, s11);
921 SANITY_CHECK_SS(s11, s12);
922 SANITY_CHECK_SS(s12, s13);
923 SANITY_CHECK_SS(s13, s14);
924 SANITY_CHECK_SS(s14, s15);
925 #undef SANITY_CHECK_SS
926
879 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { 927 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
880 if (!VFPRegsFree.any()) { 928 if (!VFPRegsFree.any()) {
881 return false; 929 return false;
882 } 930 }
883 931
884 if (isVectorType(Ty)) { 932 if (isVectorType(Ty)) {
885 // Q registers are declared in reverse order, so RegARM32::Reg_q0 > 933 // Q registers are declared in reverse order, so RegARM32::Reg_q0 >
886 // RegARM32::Reg_q1. Therefore, we need to subtract QRegStart from Reg_q0. 934 // RegARM32::Reg_q1. Therefore, we need to subtract QRegStart from Reg_q0.
887 // Same thing goes for D registers. 935 // Same thing goes for D registers.
888 static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1,
889 "ARM32 Q registers are possibly declared incorrectly.");
890
891 int32_t QRegStart = (VFPRegsFree & ValidV128Regs).find_first(); 936 int32_t QRegStart = (VFPRegsFree & ValidV128Regs).find_first();
892 if (QRegStart >= 0) { 937 if (QRegStart >= 0) {
893 VFPRegsFree.reset(QRegStart, QRegStart + 4); 938 VFPRegsFree.reset(QRegStart, QRegStart + 4);
894 *Reg = RegARM32::Reg_q0 - (QRegStart / 4); 939 *Reg = RegARM32::Reg_q0 - (QRegStart / 4);
895 return true; 940 return true;
896 } 941 }
897 } else if (Ty == IceType_f64) { 942 } else if (Ty == IceType_f64) {
898 static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1,
899 "ARM32 D registers are possibly declared incorrectly.");
900
901 int32_t DRegStart = (VFPRegsFree & ValidF64Regs).find_first(); 943 int32_t DRegStart = (VFPRegsFree & ValidF64Regs).find_first();
902 if (DRegStart >= 0) { 944 if (DRegStart >= 0) {
903 VFPRegsFree.reset(DRegStart, DRegStart + 2); 945 VFPRegsFree.reset(DRegStart, DRegStart + 2);
904 *Reg = RegARM32::Reg_d0 - (DRegStart / 2); 946 *Reg = RegARM32::Reg_d0 - (DRegStart / 2);
905 return true; 947 return true;
906 } 948 }
907 } else { 949 } else {
908 static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1,
909 "ARM32 S registers are possibly declared incorrectly.");
910
911 assert(Ty == IceType_f32); 950 assert(Ty == IceType_f32);
912 int32_t SReg = VFPRegsFree.find_first(); 951 int32_t SReg = VFPRegsFree.find_first();
913 assert(SReg >= 0); 952 assert(SReg >= 0);
914 VFPRegsFree.reset(SReg); 953 VFPRegsFree.reset(SReg);
915 *Reg = RegARM32::Reg_s0 + SReg; 954 *Reg = RegARM32::Reg_s0 + SReg;
916 return true; 955 return true;
917 } 956 }
918 957
919 // Parameter allocation failed. From now on, every fp register must be placed 958 // Parameter allocation failed. From now on, every fp register must be placed
920 // on the stack. We clear VFRegsFree in case there are any "holes" from S and 959 // on the stack. We clear VFRegsFree in case there are any "holes" from S and
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after
1089 1128
1090 // Compute the list of spilled variables and bounds for GlobalsSize, etc. 1129 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1091 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, 1130 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1092 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, 1131 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1093 &LocalsSlotsAlignmentBytes, TargetVarHook); 1132 &LocalsSlotsAlignmentBytes, TargetVarHook);
1094 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; 1133 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1095 SpillAreaSizeBytes += GlobalsSize; 1134 SpillAreaSizeBytes += GlobalsSize;
1096 1135
1097 // Add push instructions for preserved registers. On ARM, "push" can push a 1136 // Add push instructions for preserved registers. On ARM, "push" can push a
1098 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has 1137 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
1099 // callee-saved float/vector registers. The "vpush" instruction can handle a 1138 // callee-saved float/vector registers.
1100 // whole list of float/vector registers, but it only handles contiguous 1139 //
1101 // sequences of registers by specifying the start and the length. 1140 // The "vpush" instruction can handle a whole list of float/vector registers,
1102 VarList GPRsToPreserve; 1141 // but it only handles contiguous sequences of registers by specifying the
1103 GPRsToPreserve.reserve(CalleeSaves.size()); 1142 // start and the length.
1104 uint32_t NumCallee = 0; 1143 PreservedGPRs.reserve(CalleeSaves.size());
1105 size_t PreservedRegsSizeBytes = 0; 1144 PreservedSRegs.reserve(CalleeSaves.size());
1145
1106 // Consider FP and LR as callee-save / used as needed. 1146 // Consider FP and LR as callee-save / used as needed.
1107 if (UsesFramePointer) { 1147 if (UsesFramePointer) {
1148 if (RegsUsed[RegARM32::Reg_fp]) {
1149 llvm::report_fatal_error("Frame pointer has been used.");
1150 }
1108 CalleeSaves[RegARM32::Reg_fp] = true; 1151 CalleeSaves[RegARM32::Reg_fp] = true;
1109 assert(RegsUsed[RegARM32::Reg_fp] == false);
1110 RegsUsed[RegARM32::Reg_fp] = true; 1152 RegsUsed[RegARM32::Reg_fp] = true;
1111 } 1153 }
1112 if (!MaybeLeafFunc) { 1154 if (!MaybeLeafFunc) {
1113 CalleeSaves[RegARM32::Reg_lr] = true; 1155 CalleeSaves[RegARM32::Reg_lr] = true;
1114 RegsUsed[RegARM32::Reg_lr] = true; 1156 RegsUsed[RegARM32::Reg_lr] = true;
1115 } 1157 }
1158
1159 // Make two passes over the used registers. The first pass records all the
1160 // used registers -- and their aliases. Then, we figure out which GPRs and
1161 // VFP S registers should be saved. We don't bother saving D/Q registers
1162 // because their uses are recorded as S regs uses.
1163 llvm::SmallBitVector ToPreserve(RegARM32::Reg_NUM);
1116 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 1164 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1117 if (RegARM32::isI64RegisterPair(i)) { 1165 if (NeedSandboxing && i == RegARM32::Reg_r9) {
1118 // We don't save register pairs explicitly. Instead, we rely on the code 1166 // r9 is never updated in sandboxed code.
1119 // fake-defing/fake-using each register in the pair.
1120 continue; 1167 continue;
1121 } 1168 }
1122 if (CalleeSaves[i] && RegsUsed[i]) { 1169 if (CalleeSaves[i] && RegsUsed[i]) {
1123 if (NeedSandboxing && i == RegARM32::Reg_r9) { 1170 ToPreserve |= RegisterAliases[i];
1124 // r9 is never updated in sandboxed code. 1171 }
1172 }
1173
1174 uint32_t NumCallee = 0;
1175 size_t PreservedRegsSizeBytes = 0;
1176
1177 // RegClasses is a tuple of
1178 //
1179 // <First Register in Class, Last Register in Class, Vector of Save Registers>
1180 //
1181 // We use this tuple to figure out which register we should push/pop during
1182 // prolog/epilog.
1183 using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1184 const RegClassType RegClasses[] = {
1185 RegClassType(RegARM32::Reg_GPR_First, RegARM32::Reg_GPR_Last,
1186 &PreservedGPRs),
1187 RegClassType(RegARM32::Reg_SREG_First, RegARM32::Reg_SREG_Last,
1188 &PreservedSRegs)};
1189 for (const auto &RegClass : RegClasses) {
1190 const uint32_t FirstRegInClass = std::get<0>(RegClass);
1191 const uint32_t LastRegInClass = std::get<1>(RegClass);
1192 VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1193 for (uint32_t Reg = FirstRegInClass; Reg <= LastRegInClass; ++Reg) {
1194 if (!ToPreserve[Reg]) {
1125 continue; 1195 continue;
1126 } 1196 }
1127 ++NumCallee; 1197 ++NumCallee;
1128 Variable *PhysicalRegister = getPhysicalRegister(i); 1198 Variable *PhysicalRegister = getPhysicalRegister(Reg);
1129 PreservedRegsSizeBytes += 1199 PreservedRegsSizeBytes +=
1130 typeWidthInBytesOnStack(PhysicalRegister->getType()); 1200 typeWidthInBytesOnStack(PhysicalRegister->getType());
1131 GPRsToPreserve.push_back(getPhysicalRegister(i)); 1201 PreservedRegsInClass->push_back(PhysicalRegister);
1132 } 1202 }
1133 } 1203 }
1204
1134 Ctx->statsUpdateRegistersSaved(NumCallee); 1205 Ctx->statsUpdateRegistersSaved(NumCallee);
1135 if (!GPRsToPreserve.empty()) 1206 if (!PreservedSRegs.empty())
1136 _push(GPRsToPreserve); 1207 _push(PreservedSRegs);
1208 if (!PreservedGPRs.empty())
1209 _push(PreservedGPRs);
1137 1210
1138 // Generate "mov FP, SP" if needed. 1211 // Generate "mov FP, SP" if needed.
1139 if (UsesFramePointer) { 1212 if (UsesFramePointer) {
1140 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); 1213 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1141 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 1214 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1142 _mov(FP, SP); 1215 _mov(FP, SP);
1143 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). 1216 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1144 Context.insert(InstFakeUse::create(Func, FP)); 1217 Context.insert(InstFakeUse::create(Func, FP));
1145 } 1218 }
1146 1219
1147 // Align the variables area. SpillAreaPaddingBytes is the size of the region 1220 // Align the variables area. SpillAreaPaddingBytes is the size of the region
1148 // after the preserved registers and before the spill areas. 1221 // after the preserved registers and before the spill areas.
1149 // LocalsSlotsPaddingBytes is the amount of padding between the globals and 1222 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1150 // locals area if they are separate. 1223 // locals area if they are separate.
1151 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES); 1224 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
1152 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); 1225 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1153 uint32_t SpillAreaPaddingBytes = 0; 1226 uint32_t SpillAreaPaddingBytes = 0;
1154 uint32_t LocalsSlotsPaddingBytes = 0; 1227 uint32_t LocalsSlotsPaddingBytes = 0;
1155 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, 1228 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1156 GlobalsSize, LocalsSlotsAlignmentBytes, 1229 GlobalsSize, LocalsSlotsAlignmentBytes,
1157 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); 1230 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1158 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; 1231 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1159 uint32_t GlobalsAndSubsequentPaddingSize = 1232 uint32_t GlobalsAndSubsequentPaddingSize =
1160 GlobalsSize + LocalsSlotsPaddingBytes; 1233 GlobalsSize + LocalsSlotsPaddingBytes;
1161 1234
1162 // Adds the out args space to the stack, and align SP if necessary. 1235 // Adds the out args space to the stack, and align SP if necessary.
1163 if (NeedsStackAlignment) { 1236 if (!NeedsStackAlignment) {
1237 SpillAreaSizeBytes += MaxOutArgsSizeBytes;
1238 } else {
1164 uint32_t StackOffset = PreservedRegsSizeBytes; 1239 uint32_t StackOffset = PreservedRegsSizeBytes;
1165 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); 1240 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1166 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes); 1241 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);
1167 SpillAreaSizeBytes = StackSize - StackOffset; 1242 SpillAreaSizeBytes = StackSize - StackOffset;
1168 } else {
1169 SpillAreaSizeBytes += MaxOutArgsSizeBytes;
1170 } 1243 }
1171 1244
1172 // Combine fixed alloca with SpillAreaSize. 1245 // Combine fixed alloca with SpillAreaSize.
1173 SpillAreaSizeBytes += FixedAllocaSizeBytes; 1246 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1174 1247
1175 // Generate "sub sp, SpillAreaSizeBytes" 1248 // Generate "sub sp, SpillAreaSizeBytes"
1176 if (SpillAreaSizeBytes) { 1249 if (SpillAreaSizeBytes) {
1177 // Use the scratch register if needed to legalize the immediate. 1250 // Use the scratch register if needed to legalize the immediate.
1178 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), 1251 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1179 Legal_Reg | Legal_Flex, getReservedTmpReg()); 1252 Legal_Reg | Legal_Flex, getReservedTmpReg());
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
1278 // add SP, SpillAreaSizeBytes 1351 // add SP, SpillAreaSizeBytes
1279 if (SpillAreaSizeBytes) { 1352 if (SpillAreaSizeBytes) {
1280 // Use the scratch register if needed to legalize the immediate. 1353 // Use the scratch register if needed to legalize the immediate.
1281 Operand *AddAmount = 1354 Operand *AddAmount =
1282 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), 1355 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1283 Legal_Reg | Legal_Flex, getReservedTmpReg()); 1356 Legal_Reg | Legal_Flex, getReservedTmpReg());
1284 Sandboxer(this).add_sp(AddAmount); 1357 Sandboxer(this).add_sp(AddAmount);
1285 } 1358 }
1286 } 1359 }
1287 1360
1288 // Add pop instructions for preserved registers. 1361 if (!PreservedGPRs.empty())
1289 llvm::SmallBitVector CalleeSaves = 1362 _pop(PreservedGPRs);
1290 getRegisterSet(RegSet_CalleeSave, RegSet_None); 1363 if (!PreservedSRegs.empty())
1291 VarList GPRsToRestore; 1364 _pop(PreservedSRegs);
1292 GPRsToRestore.reserve(CalleeSaves.size());
1293 // Consider FP and LR as callee-save / used as needed.
1294 if (UsesFramePointer) {
1295 CalleeSaves[RegARM32::Reg_fp] = true;
1296 }
1297 if (!MaybeLeafFunc) {
1298 CalleeSaves[RegARM32::Reg_lr] = true;
1299 }
1300 // Pop registers in ascending order just like push (instead of in reverse
1301 // order).
1302 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1303 if (RegARM32::isI64RegisterPair(i)) {
1304 continue;
1305 }
1306
1307 if (CalleeSaves[i] && RegsUsed[i]) {
1308 if (NeedSandboxing && i == RegARM32::Reg_r9) {
1309 continue;
1310 }
1311 GPRsToRestore.push_back(getPhysicalRegister(i));
1312 }
1313 }
1314 if (!GPRsToRestore.empty())
1315 _pop(GPRsToRestore);
1316 1365
1317 if (!Ctx->getFlags().getUseSandboxing()) 1366 if (!Ctx->getFlags().getUseSandboxing())
1318 return; 1367 return;
1319 1368
1320 // Change the original ret instruction into a sandboxed return sequence. 1369 // Change the original ret instruction into a sandboxed return sequence.
1370 //
1321 // bundle_lock 1371 // bundle_lock
1322 // bic lr, #0xc000000f 1372 // bic lr, #0xc000000f
1323 // bx lr 1373 // bx lr
1324 // bundle_unlock 1374 // bundle_unlock
1375 //
1325 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to 1376 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
1326 // restrict to the lower 1GB as well. 1377 // restrict to the lower 1GB as well.
1327 Variable *LR = getPhysicalRegister(RegARM32::Reg_lr); 1378 Variable *LR = getPhysicalRegister(RegARM32::Reg_lr);
1328 Variable *RetValue = nullptr; 1379 Variable *RetValue = nullptr;
1329 if (RI->getSrcSize()) 1380 if (RI->getSrcSize())
1330 RetValue = llvm::cast<Variable>(RI->getSrc(0)); 1381 RetValue = llvm::cast<Variable>(RI->getSrc(0));
1331 1382
1332 Sandboxer(this).ret(LR, RetValue); 1383 Sandboxer(this).ret(LR, RetValue);
1333 1384
1334 RI->setDeleted(); 1385 RI->setDeleted();
(...skipping 1299 matching lines...) Expand 10 before | Expand all | Expand 10 after
2634 Operations)) { 2685 Operations)) {
2635 return false; 2686 return false;
2636 } 2687 }
2637 } 2688 }
2638 2689
2639 return Src == 0; 2690 return Src == 0;
2640 } 2691 }
2641 } // end of namespace StrengthReduction 2692 } // end of namespace StrengthReduction
2642 } // end of anonymous namespace 2693 } // end of anonymous namespace
2643 2694
2644 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { 2695 void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
2645 Variable *Dest = Inst->getDest(); 2696 Variable *Dest = Instr->getDest();
2646 2697
2647 if (Dest->isRematerializable()) { 2698 if (Dest->isRematerializable()) {
2648 Context.insert(InstFakeDef::create(Func, Dest)); 2699 Context.insert(InstFakeDef::create(Func, Dest));
2649 return; 2700 return;
2650 } 2701 }
2651 2702
2652 Type DestTy = Dest->getType(); 2703 Type DestTy = Dest->getType();
2653 if (DestTy == IceType_i1) { 2704 if (DestTy == IceType_i1) {
2654 lowerInt1Arithmetic(Inst); 2705 lowerInt1Arithmetic(Instr);
2655 return; 2706 return;
2656 } 2707 }
2657 2708
2658 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); 2709 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
2659 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); 2710 Operand *Src1 = legalizeUndef(Instr->getSrc(1));
2660 if (DestTy == IceType_i64) { 2711 if (DestTy == IceType_i64) {
2661 lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1); 2712 lowerInt64Arithmetic(Instr->getOp(), Instr->getDest(), Src0, Src1);
2662 return; 2713 return;
2663 } 2714 }
2664 2715
2665 if (isVectorType(DestTy)) { 2716 if (isVectorType(DestTy)) {
2666 // Add a fake def to keep liveness consistent in the meantime. 2717 // Add a fake def to keep liveness consistent in the meantime.
2667 Variable *T = makeReg(DestTy); 2718 Variable *T = makeReg(DestTy);
2668 Context.insert(InstFakeDef::create(Func, T)); 2719 Context.insert(InstFakeDef::create(Func, T));
2669 _mov(Dest, T); 2720 _mov(Dest, T);
2670 UnimplementedError(Func->getContext()->getFlags()); 2721 UnimplementedError(Func->getContext()->getFlags());
2671 return; 2722 return;
2672 } 2723 }
2673 2724
2674 // DestTy is a non-i64 scalar. 2725 // DestTy is a non-i64 scalar.
2675 Variable *T = makeReg(DestTy); 2726 Variable *T = makeReg(DestTy);
2676 2727
2677 // * Handle div/rem separately. They require a non-legalized Src1 to inspect 2728 // * Handle div/rem separately. They require a non-legalized Src1 to inspect
2678 // whether or not Src1 is a non-zero constant. Once legalized it is more 2729 // whether or not Src1 is a non-zero constant. Once legalized it is more
2679 // difficult to determine (constant may be moved to a register). 2730 // difficult to determine (constant may be moved to a register).
2680 // * Handle floating point arithmetic separately: they require Src1 to be 2731 // * Handle floating point arithmetic separately: they require Src1 to be
2681 // legalized to a register. 2732 // legalized to a register.
2682 switch (Inst->getOp()) { 2733 switch (Instr->getOp()) {
2683 default: 2734 default:
2684 break; 2735 break;
2685 case InstArithmetic::Udiv: { 2736 case InstArithmetic::Udiv: {
2686 constexpr bool NotRemainder = false; 2737 constexpr bool NotRemainder = false;
2687 Variable *Src0R = legalizeToReg(Src0); 2738 Variable *Src0R = legalizeToReg(Src0);
2688 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, 2739 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
2689 NotRemainder); 2740 NotRemainder);
2690 return; 2741 return;
2691 } 2742 }
2692 case InstArithmetic::Sdiv: { 2743 case InstArithmetic::Sdiv: {
(...skipping 18 matching lines...) Expand all
2711 return; 2762 return;
2712 } 2763 }
2713 case InstArithmetic::Frem: { 2764 case InstArithmetic::Frem: {
2714 if (!isScalarFloatingType(DestTy)) { 2765 if (!isScalarFloatingType(DestTy)) {
2715 llvm::report_fatal_error("Unexpected type when lowering frem."); 2766 llvm::report_fatal_error("Unexpected type when lowering frem.");
2716 } 2767 }
2717 llvm::report_fatal_error("Frem should have already been lowered."); 2768 llvm::report_fatal_error("Frem should have already been lowered.");
2718 } 2769 }
2719 case InstArithmetic::Fadd: { 2770 case InstArithmetic::Fadd: {
2720 Variable *Src0R = legalizeToReg(Src0); 2771 Variable *Src0R = legalizeToReg(Src0);
2772 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
2773 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
2774 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
2775 _vmla(Src0R, Src1R, Src2R);
2776 _mov(Dest, Src0R);
2777 return;
2778 }
2779
2721 Variable *Src1R = legalizeToReg(Src1); 2780 Variable *Src1R = legalizeToReg(Src1);
2722 _vadd(T, Src0R, Src1R); 2781 _vadd(T, Src0R, Src1R);
2723 _mov(Dest, T); 2782 _mov(Dest, T);
2724 return; 2783 return;
2725 } 2784 }
2726 case InstArithmetic::Fsub: { 2785 case InstArithmetic::Fsub: {
2727 Variable *Src0R = legalizeToReg(Src0); 2786 Variable *Src0R = legalizeToReg(Src0);
2787 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
2788 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
2789 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
2790 _vmls(Src0R, Src1R, Src2R);
2791 _mov(Dest, Src0R);
2792 return;
2793 }
2728 Variable *Src1R = legalizeToReg(Src1); 2794 Variable *Src1R = legalizeToReg(Src1);
2729 _vsub(T, Src0R, Src1R); 2795 _vsub(T, Src0R, Src1R);
2730 _mov(Dest, T); 2796 _mov(Dest, T);
2731 return; 2797 return;
2732 } 2798 }
2733 case InstArithmetic::Fmul: { 2799 case InstArithmetic::Fmul: {
2734 Variable *Src0R = legalizeToReg(Src0); 2800 Variable *Src0R = legalizeToReg(Src0);
2735 Variable *Src1R = legalizeToReg(Src1); 2801 Variable *Src1R = legalizeToReg(Src1);
2736 _vmul(T, Src0R, Src1R); 2802 _vmul(T, Src0R, Src1R);
2737 _mov(Dest, T); 2803 _mov(Dest, T);
2738 return; 2804 return;
2739 } 2805 }
2740 case InstArithmetic::Fdiv: { 2806 case InstArithmetic::Fdiv: {
2741 Variable *Src0R = legalizeToReg(Src0); 2807 Variable *Src0R = legalizeToReg(Src0);
2742 Variable *Src1R = legalizeToReg(Src1); 2808 Variable *Src1R = legalizeToReg(Src1);
2743 _vdiv(T, Src0R, Src1R); 2809 _vdiv(T, Src0R, Src1R);
2744 _mov(Dest, T); 2810 _mov(Dest, T);
2745 return; 2811 return;
2746 } 2812 }
2747 } 2813 }
2748 2814
2749 // Handle everything else here. 2815 // Handle everything else here.
2750 Int32Operands Srcs(Src0, Src1); 2816 Int32Operands Srcs(Src0, Src1);
2751 switch (Inst->getOp()) { 2817 switch (Instr->getOp()) {
2752 case InstArithmetic::_num: 2818 case InstArithmetic::_num:
2753 llvm::report_fatal_error("Unknown arithmetic operator"); 2819 llvm::report_fatal_error("Unknown arithmetic operator");
2754 return; 2820 return;
2755 case InstArithmetic::Add: { 2821 case InstArithmetic::Add: {
2822 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
2823 Variable *Src0R = legalizeToReg(Src0);
2824 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
2825 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
2826 _mla(T, Src1R, Src2R, Src0R);
2827 _mov(Dest, T);
2828 return;
2829 }
2830
2756 if (Srcs.hasConstOperand()) { 2831 if (Srcs.hasConstOperand()) {
2757 if (!Srcs.immediateIsFlexEncodable() && 2832 if (!Srcs.immediateIsFlexEncodable() &&
2758 Srcs.negatedImmediateIsFlexEncodable()) { 2833 Srcs.negatedImmediateIsFlexEncodable()) {
2759 Variable *Src0R = Srcs.src0R(this); 2834 Variable *Src0R = Srcs.src0R(this);
2760 Operand *Src1F = Srcs.negatedSrc1F(this); 2835 Operand *Src1F = Srcs.negatedSrc1F(this);
2761 if (!Srcs.swappedOperands()) { 2836 if (!Srcs.swappedOperands()) {
2762 _sub(T, Src0R, Src1F); 2837 _sub(T, Src0R, Src1F);
2763 } else { 2838 } else {
2764 _rsb(T, Src0R, Src1F); 2839 _rsb(T, Src0R, Src1F);
2765 } 2840 }
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
2798 return; 2873 return;
2799 } 2874 }
2800 case InstArithmetic::Xor: { 2875 case InstArithmetic::Xor: {
2801 Variable *Src0R = Srcs.src0R(this); 2876 Variable *Src0R = Srcs.src0R(this);
2802 Operand *Src1RF = Srcs.src1RF(this); 2877 Operand *Src1RF = Srcs.src1RF(this);
2803 _eor(T, Src0R, Src1RF); 2878 _eor(T, Src0R, Src1RF);
2804 _mov(Dest, T); 2879 _mov(Dest, T);
2805 return; 2880 return;
2806 } 2881 }
2807 case InstArithmetic::Sub: { 2882 case InstArithmetic::Sub: {
2883 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
2884 Variable *Src0R = legalizeToReg(Src0);
2885 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
2886 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
2887 _mls(T, Src1R, Src2R, Src0R);
2888 _mov(Dest, T);
2889 return;
2890 }
2891
2808 if (Srcs.hasConstOperand()) { 2892 if (Srcs.hasConstOperand()) {
2809 if (Srcs.immediateIsFlexEncodable()) { 2893 if (Srcs.immediateIsFlexEncodable()) {
2810 Variable *Src0R = Srcs.src0R(this); 2894 Variable *Src0R = Srcs.src0R(this);
2811 Operand *Src1RF = Srcs.src1RF(this); 2895 Operand *Src1RF = Srcs.src1RF(this);
2812 if (Srcs.swappedOperands()) { 2896 if (Srcs.swappedOperands()) {
2813 _rsb(T, Src0R, Src1RF); 2897 _rsb(T, Src0R, Src1RF);
2814 } else { 2898 } else {
2815 _sub(T, Src0R, Src1RF); 2899 _sub(T, Src0R, Src1RF);
2816 } 2900 }
2817 _mov(Dest, T); 2901 _mov(Dest, T);
(...skipping 188 matching lines...) Expand 10 before | Expand all | Expand 10 after
3006 } 3090 }
3007 _mov(Dest, NewSrc); 3091 _mov(Dest, NewSrc);
3008 } 3092 }
3009 3093
3010 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( 3094 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
3011 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, 3095 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
3012 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { 3096 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {
3013 InstARM32Label *NewShortCircuitLabel = nullptr; 3097 InstARM32Label *NewShortCircuitLabel = nullptr;
3014 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); 3098 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3015 3099
3016 const Inst *Producer = BoolComputations.getProducerOf(Boolean); 3100 const Inst *Producer = Computations.getProducerOf(Boolean);
3017 3101
3018 if (Producer == nullptr) { 3102 if (Producer == nullptr) {
3019 // No producer, no problem: just do emit code to perform (Boolean & 1) and 3103 // No producer, no problem: just do emit code to perform (Boolean & 1) and
3020 // set the flags register. The branch should be taken if the resulting flags 3104 // set the flags register. The branch should be taken if the resulting flags
3021 // indicate a non-zero result. 3105 // indicate a non-zero result.
3022 _tst(legalizeToReg(Boolean), _1); 3106 _tst(legalizeToReg(Boolean), _1);
3023 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE)); 3107 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3024 } 3108 }
3025 3109
3026 switch (Producer->getKind()) { 3110 switch (Producer->getKind()) {
(...skipping 200 matching lines...) Expand 10 before | Expand all | Expand 10 after
3227 Variable *ReturnReg = nullptr; 3311 Variable *ReturnReg = nullptr;
3228 Variable *ReturnRegHi = nullptr; 3312 Variable *ReturnRegHi = nullptr;
3229 if (Dest) { 3313 if (Dest) {
3230 switch (Dest->getType()) { 3314 switch (Dest->getType()) {
3231 case IceType_NUM: 3315 case IceType_NUM:
3232 llvm::report_fatal_error("Invalid Call dest type"); 3316 llvm::report_fatal_error("Invalid Call dest type");
3233 break; 3317 break;
3234 case IceType_void: 3318 case IceType_void:
3235 break; 3319 break;
3236 case IceType_i1: 3320 case IceType_i1:
3237 assert(BoolComputations.getProducerOf(Dest) == nullptr); 3321 assert(Computations.getProducerOf(Dest) == nullptr);
3238 // Fall-through intended. 3322 // Fall-through intended.
3239 case IceType_i8: 3323 case IceType_i8:
3240 case IceType_i16: 3324 case IceType_i16:
3241 case IceType_i32: 3325 case IceType_i32:
3242 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0); 3326 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
3243 break; 3327 break;
3244 case IceType_i64: 3328 case IceType_i64:
3245 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0); 3329 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
3246 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1); 3330 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
3247 break; 3331 break;
(...skipping 2054 matching lines...) Expand 10 before | Expand all | Expand 10 after
5302 Type Ty = Src->getType(); 5386 Type Ty = Src->getType();
5303 Variable *Reg = makeReg(Ty, RegNum); 5387 Variable *Reg = makeReg(Ty, RegNum);
5304 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Src)) { 5388 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Src)) {
5305 _ldr(Reg, Mem); 5389 _ldr(Reg, Mem);
5306 } else { 5390 } else {
5307 _mov(Reg, Src); 5391 _mov(Reg, Src);
5308 } 5392 }
5309 return Reg; 5393 return Reg;
5310 } 5394 }
5311 5395
5396 // TODO(jpp): remove unneeded else clauses in legalize.
5312 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, 5397 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
5313 int32_t RegNum) { 5398 int32_t RegNum) {
5314 Type Ty = From->getType(); 5399 Type Ty = From->getType();
5315 // Assert that a physical register is allowed. To date, all calls to 5400 // Assert that a physical register is allowed. To date, all calls to
5316 // legalize() allow a physical register. Legal_Flex converts registers to the 5401 // legalize() allow a physical register. Legal_Flex converts registers to the
5317 // right type OperandARM32FlexReg as needed. 5402 // right type OperandARM32FlexReg as needed.
5318 assert(Allowed & Legal_Reg); 5403 assert(Allowed & Legal_Reg);
5319 5404
5320 // Copied ipsis literis from TargetX86Base<Machine>. 5405 // Copied ipsis literis from TargetX86Base<Machine>.
5321 if (RegNum == Variable::NoRegister) { 5406 if (RegNum == Variable::NoRegister) {
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
5405 } 5490 }
5406 5491
5407 if (llvm::isa<Constant>(From)) { 5492 if (llvm::isa<Constant>(From)) {
5408 if (llvm::isa<ConstantUndef>(From)) { 5493 if (llvm::isa<ConstantUndef>(From)) {
5409 From = legalizeUndef(From, RegNum); 5494 From = legalizeUndef(From, RegNum);
5410 if (isVectorType(Ty)) 5495 if (isVectorType(Ty))
5411 return From; 5496 return From;
5412 } 5497 }
5413 // There should be no constants of vector type (other than undef). 5498 // There should be no constants of vector type (other than undef).
5414 assert(!isVectorType(Ty)); 5499 assert(!isVectorType(Ty));
5415 bool CanBeFlex = Allowed & Legal_Flex;
5416 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { 5500 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
5417 uint32_t RotateAmt; 5501 uint32_t RotateAmt;
5418 uint32_t Immed_8; 5502 uint32_t Immed_8;
5419 uint32_t Value = static_cast<uint32_t>(C32->getValue()); 5503 uint32_t Value = static_cast<uint32_t>(C32->getValue());
5420 // Check if the immediate will fit in a Flexible second operand, if a 5504 if (OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
5421 // Flexible second operand is allowed. We need to know the exact value, 5505 // The immediate can be encoded as a Flex immediate. We may return the
5422 // so that rules out relocatable constants. Also try the inverse and use 5506 // Flex operand if the caller has Allow'ed it.
5423 // MVN if possible. 5507 auto *OpF = OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
5424 if (CanBeFlex && 5508 const bool CanBeFlex = Allowed & Legal_Flex;
5425 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) { 5509 if (CanBeFlex)
5426 return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); 5510 return OpF;
5427 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm( 5511 return copyToReg(OpF, RegNum);
5428 ~Value, &RotateAmt, &Immed_8)) { 5512 } else if (OperandARM32FlexImm::canHoldImm(~Value, &RotateAmt,
5429 auto InvertedFlex = 5513 &Immed_8)) {
5514 // Even though the immediate can't be encoded as a Flex operand, its
5515 // inverted bit pattern can, thus we use ARM's mvn to load the 32-bit
5516 // constant with a single instruction.
5517 auto *InvOpF =
5430 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); 5518 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
5431 Variable *Reg = makeReg(Ty, RegNum); 5519 Variable *Reg = makeReg(Ty, RegNum);
5432 _mvn(Reg, InvertedFlex); 5520 _mvn(Reg, InvOpF);
5433 return Reg; 5521 return Reg;
5434 } else { 5522 } else {
5435 // Do a movw/movt to a register. 5523 // Do a movw/movt to a register.
5436 Variable *Reg = makeReg(Ty, RegNum); 5524 Variable *Reg = makeReg(Ty, RegNum);
5437 uint32_t UpperBits = (Value >> 16) & 0xFFFF; 5525 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
5438 _movw(Reg, 5526 _movw(Reg,
5439 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); 5527 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
5440 if (UpperBits != 0) { 5528 if (UpperBits != 0) {
5441 _movt(Reg, Ctx->getConstantInt32(UpperBits)); 5529 _movt(Reg, Ctx->getConstantInt32(UpperBits));
5442 } 5530 }
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
5479 return copyToReg(From, RegNum); 5567 return copyToReg(From, RegNum);
5480 } 5568 }
5481 } 5569 }
5482 5570
5483 if (auto *Var = llvm::dyn_cast<Variable>(From)) { 5571 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
5484 if (Var->isRematerializable()) { 5572 if (Var->isRematerializable()) {
5485 if (Allowed & Legal_Rematerializable) { 5573 if (Allowed & Legal_Rematerializable) {
5486 return From; 5574 return From;
5487 } 5575 }
5488 5576
5489 // TODO(jpp): We don't need to rematerialize Var if legalize() was invoked
5490 // for a Variable in a Mem operand.
5491 Variable *T = makeReg(Var->getType(), RegNum); 5577 Variable *T = makeReg(Var->getType(), RegNum);
5492 _mov(T, Var); 5578 _mov(T, Var);
5493 return T; 5579 return T;
5494 } 5580 }
5495 // Check if the variable is guaranteed a physical register. This can happen 5581 // Check if the variable is guaranteed a physical register. This can happen
5496 // either when the variable is pre-colored or when it is assigned infinite 5582 // either when the variable is pre-colored or when it is assigned infinite
5497 // weight. 5583 // weight.
5498 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); 5584 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
5499 // We need a new physical register for the operand if: 5585 // We need a new physical register for the operand if:
5500 // Mem is not allowed and Var isn't guaranteed a physical 5586 // Mem is not allowed and Var isn't guaranteed a physical
(...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after
5681 5767
5682 _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex)); 5768 _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex));
5683 if (DestHi) { 5769 if (DestHi) {
5684 _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex)); 5770 _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex));
5685 } 5771 }
5686 5772
5687 CondWhenTrue Cond(CondARM32::kNone); 5773 CondWhenTrue Cond(CondARM32::kNone);
5688 // FlagsWereSet is used to determine wether Boolean was folded or not. If not, 5774 // FlagsWereSet is used to determine wether Boolean was folded or not. If not,
5689 // add an explicit _tst instruction below. 5775 // add an explicit _tst instruction below.
5690 bool FlagsWereSet = false; 5776 bool FlagsWereSet = false;
5691 if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) { 5777 if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
5692 switch (Producer->getKind()) { 5778 switch (Producer->getKind()) {
5693 default: 5779 default:
5694 llvm::report_fatal_error("Unexpected producer."); 5780 llvm::report_fatal_error("Unexpected producer.");
5695 case Inst::Icmp: { 5781 case Inst::Icmp: {
5696 Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer)); 5782 Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
5697 FlagsWereSet = true; 5783 FlagsWereSet = true;
5698 } break; 5784 } break;
5699 case Inst::Fcmp: { 5785 case Inst::Fcmp: {
5700 Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer)); 5786 Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
5701 FlagsWereSet = true; 5787 FlagsWereSet = true;
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
5765 5851
5766 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest, 5852 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest,
5767 Operand *Boolean) { 5853 Operand *Boolean) {
5768 assert(Boolean->getType() == IceType_i1); 5854 assert(Boolean->getType() == IceType_i1);
5769 Variable *T = makeReg(IceType_i1); 5855 Variable *T = makeReg(IceType_i1);
5770 Operand *_0 = 5856 Operand *_0 =
5771 legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex); 5857 legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex);
5772 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); 5858 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
5773 5859
5774 SafeBoolChain Safe = SBC_Yes; 5860 SafeBoolChain Safe = SBC_Yes;
5775 if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) { 5861 if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
5776 switch (Producer->getKind()) { 5862 switch (Producer->getKind()) {
5777 default: 5863 default:
5778 llvm::report_fatal_error("Unexpected producer."); 5864 llvm::report_fatal_error("Unexpected producer.");
5779 case Inst::Icmp: { 5865 case Inst::Icmp: {
5780 _mov(T, _0); 5866 _mov(T, _0);
5781 CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer)); 5867 CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
5782 assert(Cond.WhenTrue0 != CondARM32::AL); 5868 assert(Cond.WhenTrue0 != CondARM32::AL);
5783 assert(Cond.WhenTrue0 != CondARM32::kNone); 5869 assert(Cond.WhenTrue0 != CondARM32::kNone);
5784 assert(Cond.WhenTrue1 == CondARM32::kNone); 5870 assert(Cond.WhenTrue1 == CondARM32::kNone);
5785 _mov_redefined(T, _1, Cond.WhenTrue0); 5871 _mov_redefined(T, _1, Cond.WhenTrue0);
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
5877 return false; 5963 return false;
5878 case InstArithmetic::And: 5964 case InstArithmetic::And:
5879 return !isVectorType(Instr.getDest()->getType()); 5965 return !isVectorType(Instr.getDest()->getType());
5880 case InstArithmetic::Or: 5966 case InstArithmetic::Or:
5881 return !isVectorType(Instr.getDest()->getType()); 5967 return !isVectorType(Instr.getDest()->getType());
5882 } 5968 }
5883 } 5969 }
5884 } 5970 }
5885 } 5971 }
5886 } // end of namespace BoolFolding 5972 } // end of namespace BoolFolding
5973
5974 namespace FpFolding {
5975 bool shouldTrackProducer(const Inst &Instr) {
5976 switch (Instr.getKind()) {
5977 default:
5978 return false;
5979 case Inst::Arithmetic: {
5980 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
5981 default:
5982 return false;
5983 case InstArithmetic::Fmul:
5984 return true;
5985 }
5986 }
5987 }
5988 }
5989
5990 bool isValidConsumer(const Inst &Instr) {
5991 switch (Instr.getKind()) {
5992 default:
5993 return false;
5994 case Inst::Arithmetic: {
5995 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
5996 default:
5997 return false;
5998 case InstArithmetic::Fadd:
5999 case InstArithmetic::Fsub:
6000 return true;
6001 }
6002 }
6003 }
6004 }
6005 } // end of namespace FpFolding
6006
6007 namespace IntFolding {
6008 bool shouldTrackProducer(const Inst &Instr) {
6009 switch (Instr.getKind()) {
6010 default:
6011 return false;
6012 case Inst::Arithmetic: {
6013 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6014 default:
6015 return false;
6016 case InstArithmetic::Mul:
6017 return true;
6018 }
6019 }
6020 }
6021 }
6022
6023 bool isValidConsumer(const Inst &Instr) {
6024 switch (Instr.getKind()) {
6025 default:
6026 return false;
6027 case Inst::Arithmetic: {
6028 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6029 default:
6030 return false;
6031 case InstArithmetic::Add:
6032 case InstArithmetic::Sub:
6033 return true;
6034 }
6035 }
6036 }
6037 }
6038 } // end of namespace FpFolding
5887 } // end of anonymous namespace 6039 } // end of anonymous namespace
5888 6040
5889 void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) { 6041 void TargetARM32::ComputationTracker::recordProducers(CfgNode *Node) {
5890 for (Inst &Instr : Node->getInsts()) { 6042 for (Inst &Instr : Node->getInsts()) {
5891 // Check whether Instr is a valid producer. 6043 // Check whether Instr is a valid producer.
5892 Variable *Dest = Instr.getDest(); 6044 Variable *Dest = Instr.getDest();
5893 if (!Instr.isDeleted() // only consider non-deleted instructions; and 6045 if (!Instr.isDeleted() // only consider non-deleted instructions; and
5894 && Dest // only instructions with an actual dest var; and 6046 && Dest // only instructions with an actual dest var; and
5895 && Dest->getType() == IceType_i1 // only bool-type dest vars; and 6047 && Dest->getType() == IceType_i1 // only bool-type dest vars; and
5896 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr. 6048 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
5897 KnownComputations.emplace(Dest->getIndex(), BoolComputationEntry(&Instr)); 6049 KnownComputations.emplace(Dest->getIndex(),
6050 ComputationEntry(&Instr, IceType_i1));
6051 }
6052 if (!Instr.isDeleted() // only consider non-deleted instructions; and
6053 && Dest // only instructions with an actual dest var; and
6054 && isScalarFloatingType(Dest->getType()) // fp-type only dest vars; and
6055 && FpFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6056 KnownComputations.emplace(Dest->getIndex(),
6057 ComputationEntry(&Instr, Dest->getType()));
6058 }
6059 if (!Instr.isDeleted() // only consider non-deleted instructions; and
6060 && Dest // only instructions with an actual dest var; and
6061 && Dest->getType() == IceType_i32 // i32 only dest vars; and
6062 && IntFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6063 KnownComputations.emplace(Dest->getIndex(),
6064 ComputationEntry(&Instr, IceType_i32));
5898 } 6065 }
5899 // Check each src variable against the map. 6066 // Check each src variable against the map.
5900 FOREACH_VAR_IN_INST(Var, Instr) { 6067 FOREACH_VAR_IN_INST(Var, Instr) {
5901 SizeT VarNum = Var->getIndex(); 6068 SizeT VarNum = Var->getIndex();
5902 auto ComputationIter = KnownComputations.find(VarNum); 6069 auto ComputationIter = KnownComputations.find(VarNum);
5903 if (ComputationIter == KnownComputations.end()) { 6070 if (ComputationIter == KnownComputations.end()) {
5904 continue; 6071 continue;
5905 } 6072 }
5906 6073
5907 ++ComputationIter->second.NumUses; 6074 ++ComputationIter->second.NumUses;
5908 if (!BoolFolding::isValidConsumer(Instr)) { 6075 switch (ComputationIter->second.ComputationType) {
6076 default:
5909 KnownComputations.erase(VarNum); 6077 KnownComputations.erase(VarNum);
5910 continue; 6078 continue;
6079 case IceType_i1:
6080 if (!BoolFolding::isValidConsumer(Instr)) {
6081 KnownComputations.erase(VarNum);
6082 continue;
6083 }
6084 break;
6085 case IceType_i32:
6086 if (IndexOfVarInInst(Var) != 1 || !IntFolding::isValidConsumer(Instr)) {
6087 KnownComputations.erase(VarNum);
6088 continue;
6089 }
6090 break;
6091 case IceType_f32:
6092 case IceType_f64:
6093 if (IndexOfVarInInst(Var) != 1 || !FpFolding::isValidConsumer(Instr)) {
6094 KnownComputations.erase(VarNum);
6095 continue;
6096 }
6097 break;
5911 } 6098 }
5912 6099
5913 if (Instr.isLastUse(Var)) { 6100 if (Instr.isLastUse(Var)) {
5914 ComputationIter->second.IsLiveOut = false; 6101 ComputationIter->second.IsLiveOut = false;
5915 } 6102 }
5916 } 6103 }
5917 } 6104 }
5918 6105
5919 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end(); 6106 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
5920 Iter != End;) { 6107 Iter != End;) {
(...skipping 325 matching lines...) Expand 10 before | Expand all | Expand 10 after
6246 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 6433 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
6247 // However, for compatibility with current NaCl LLVM, don't claim that. 6434 // However, for compatibility with current NaCl LLVM, don't claim that.
6248 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 6435 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
6249 } 6436 }
6250 6437
6251 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; 6438 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];
6252 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; 6439 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
6253 llvm::SmallBitVector TargetARM32::ScratchRegs; 6440 llvm::SmallBitVector TargetARM32::ScratchRegs;
6254 6441
6255 } // end of namespace Ice 6442 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/assembler/arm32/mov-const.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698