src/IceTargetLoweringARM32.cpp - Issue 1481133002: Subzero. ARM32. Show FP lowering some love.

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1481133002: Subzero. ARM32. Show FP lowering some love. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//	1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 858 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
869 }	869 }

870	870

871 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {	871 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {

872 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)	872 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)

873 return false;	873 return false;

874 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed;	874 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed;

875 ++NumGPRRegsUsed;	875 ++NumGPRRegsUsed;

876 return true;	876 return true;

877 }	877 }

878	878

	879 #define SANITY_CHECK_QS(_0, _1) \

	880 static_assert((RegARM32::Reg_##_1 + 1) == RegARM32::Reg_##_0, \

	881 "ARM32 " #_0 " and " #_1 " registers are declared " \

	882 "incorrectly.")

	883 SANITY_CHECK_QS(q0, q1);

	884 SANITY_CHECK_QS(q1, q2);

	885 SANITY_CHECK_QS(q2, q3);

	886 SANITY_CHECK_QS(q3, q4);

	887 SANITY_CHECK_QS(q4, q5);

	888 SANITY_CHECK_QS(q5, q6);

	889 SANITY_CHECK_QS(q6, q7);

	890 SANITY_CHECK_QS(q7, q8);

	891 SANITY_CHECK_QS(q8, q9);

	892 SANITY_CHECK_QS(q9, q10);

	893 SANITY_CHECK_QS(q10, q11);

	894 SANITY_CHECK_QS(q11, q12);

	895 SANITY_CHECK_QS(q12, q13);

	896 SANITY_CHECK_QS(q13, q14);

	897 SANITY_CHECK_QS(q14, q15);

	898 #undef SANITY_CHECK_QS

	899 #define SANITY_CHECK_DS(_0, _1) \

	900 static_assert((RegARM32::Reg_##_1 + 1) == RegARM32::Reg_##_0, \

	901 "ARM32 " #_0 " and " #_1 " registers are declared " \

	902 "incorrectly.")

	903 SANITY_CHECK_DS(d0, d1);

	904 SANITY_CHECK_DS(d1, d2);

	905 SANITY_CHECK_DS(d2, d3);

	906 SANITY_CHECK_DS(d3, d4);

	907 SANITY_CHECK_DS(d4, d5);

	908 SANITY_CHECK_DS(d5, d6);

	909 SANITY_CHECK_DS(d6, d7);

	910 SANITY_CHECK_DS(d7, d8);

	911 SANITY_CHECK_DS(d8, d9);

	912 SANITY_CHECK_DS(d9, d10);

	913 SANITY_CHECK_DS(d10, d11);

	914 SANITY_CHECK_DS(d11, d12);

	915 SANITY_CHECK_DS(d12, d13);

	916 SANITY_CHECK_DS(d13, d14);

	917 SANITY_CHECK_DS(d14, d15);

	918 SANITY_CHECK_DS(d15, d16);

	919 SANITY_CHECK_DS(d16, d17);

	920 SANITY_CHECK_DS(d17, d18);

	921 SANITY_CHECK_DS(d18, d19);

	922 SANITY_CHECK_DS(d19, d20);

	923 SANITY_CHECK_DS(d20, d21);

	924 SANITY_CHECK_DS(d21, d22);

	925 SANITY_CHECK_DS(d22, d23);

	926 SANITY_CHECK_DS(d23, d24);

	927 SANITY_CHECK_DS(d24, d25);

	928 SANITY_CHECK_DS(d25, d26);

	929 SANITY_CHECK_DS(d26, d27);

	930 SANITY_CHECK_DS(d27, d28);

	931 SANITY_CHECK_DS(d28, d29);

	932 SANITY_CHECK_DS(d29, d30);

	933 SANITY_CHECK_DS(d30, d31);

	934 #undef SANITY_CHECK_DS

	935 #define SANITY_CHECK_SS(_0, _1) \

	936 static_assert((RegARM32::Reg_##_0 + 1) == RegARM32::Reg_##_1, \

	937 "ARM32 " #_0 " and " #_1 " registers are declared " \

	938 "incorrectly.")

	939 SANITY_CHECK_SS(s0, s1);

	940 SANITY_CHECK_SS(s1, s2);

	941 SANITY_CHECK_SS(s2, s3);

	942 SANITY_CHECK_SS(s3, s4);

	943 SANITY_CHECK_SS(s4, s5);

	944 SANITY_CHECK_SS(s5, s6);

	945 SANITY_CHECK_SS(s6, s7);

	946 SANITY_CHECK_SS(s7, s8);

	947 SANITY_CHECK_SS(s8, s9);

	948 SANITY_CHECK_SS(s9, s10);

	949 SANITY_CHECK_SS(s10, s11);

	950 SANITY_CHECK_SS(s11, s12);

	951 SANITY_CHECK_SS(s12, s13);

	952 SANITY_CHECK_SS(s13, s14);

	953 SANITY_CHECK_SS(s14, s15);

	954 SANITY_CHECK_SS(s15, s16);

	955 SANITY_CHECK_SS(s16, s17);

	956 SANITY_CHECK_SS(s17, s18);

	957 SANITY_CHECK_SS(s18, s19);

	958 SANITY_CHECK_SS(s19, s20);

	959 SANITY_CHECK_SS(s20, s21);

	960 SANITY_CHECK_SS(s21, s22);

	961 SANITY_CHECK_SS(s22, s23);

	962 SANITY_CHECK_SS(s23, s24);

	963 SANITY_CHECK_SS(s24, s25);

	964 SANITY_CHECK_SS(s25, s26);

	965 SANITY_CHECK_SS(s26, s27);

	966 SANITY_CHECK_SS(s27, s28);

	967 SANITY_CHECK_SS(s28, s29);

	968 SANITY_CHECK_SS(s29, s30);

	969 SANITY_CHECK_SS(s30, s31);

	970 #undef SANITY_CHECK_SS

	971

879 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {	972 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {

880 if (!VFPRegsFree.any()) {	973 if (!VFPRegsFree.any()) {

881 return false;	974 return false;

882 }	975 }

883	976

884 if (isVectorType(Ty)) {	977 if (isVectorType(Ty)) {

885 // Q registers are declared in reverse order, so RegARM32::Reg_q0 >	978 // Q registers are declared in reverse order, so RegARM32::Reg_q0 >

886 // RegARM32::Reg_q1. Therefore, we need to subtract QRegStart from Reg_q0.	979 // RegARM32::Reg_q1. Therefore, we need to subtract QRegStart from Reg_q0.

887 // Same thing goes for D registers.	980 // Same thing goes for D registers.

888 static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1,

889 "ARM32 Q registers are possibly declared incorrectly.");

890	981

891 int32_t QRegStart = (VFPRegsFree & ValidV128Regs).find_first();	982 int32_t QRegStart = (VFPRegsFree & ValidV128Regs).find_first();

892 if (QRegStart >= 0) {	983 if (QRegStart >= 0) {

893 VFPRegsFree.reset(QRegStart, QRegStart + 4);	984 VFPRegsFree.reset(QRegStart, QRegStart + 4);

894 *Reg = RegARM32::Reg_q0 - (QRegStart / 4);	985 *Reg = RegARM32::Reg_q0 - (QRegStart / 4);

895 return true;	986 return true;

896 }	987 }

897 } else if (Ty == IceType_f64) {	988 } else if (Ty == IceType_f64) {

898 static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1,

899 "ARM32 D registers are possibly declared incorrectly.");

900

901 int32_t DRegStart = (VFPRegsFree & ValidF64Regs).find_first();	989 int32_t DRegStart = (VFPRegsFree & ValidF64Regs).find_first();

902 if (DRegStart >= 0) {	990 if (DRegStart >= 0) {

903 VFPRegsFree.reset(DRegStart, DRegStart + 2);	991 VFPRegsFree.reset(DRegStart, DRegStart + 2);

904 *Reg = RegARM32::Reg_d0 - (DRegStart / 2);	992 *Reg = RegARM32::Reg_d0 - (DRegStart / 2);

905 return true;	993 return true;

906 }	994 }

907 } else {	995 } else {

908 static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1,

909 "ARM32 S registers are possibly declared incorrectly.");

910

911 assert(Ty == IceType_f32);	996 assert(Ty == IceType_f32);

912 int32_t SReg = VFPRegsFree.find_first();	997 int32_t SReg = VFPRegsFree.find_first();

913 assert(SReg >= 0);	998 assert(SReg >= 0);

914 VFPRegsFree.reset(SReg);	999 VFPRegsFree.reset(SReg);

915 *Reg = RegARM32::Reg_s0 + SReg;	1000 *Reg = RegARM32::Reg_s0 + SReg;

916 return true;	1001 return true;

917 }	1002 }

918	1003

919 // Parameter allocation failed. From now on, every fp register must be placed	1004 // Parameter allocation failed. From now on, every fp register must be placed

920 // on the stack. We clear VFRegsFree in case there are any "holes" from S and	1005 // on the stack. We clear VFRegsFree in case there are any "holes" from S and

(...skipping 168 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1089	1174

1090 // Compute the list of spilled variables and bounds for GlobalsSize, etc.	1175 // Compute the list of spilled variables and bounds for GlobalsSize, etc.

1091 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,	1176 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,

1092 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,	1177 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,

1093 &LocalsSlotsAlignmentBytes, TargetVarHook);	1178 &LocalsSlotsAlignmentBytes, TargetVarHook);

1094 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;	1179 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;

1095 SpillAreaSizeBytes += GlobalsSize;	1180 SpillAreaSizeBytes += GlobalsSize;

1096	1181

1097 // Add push instructions for preserved registers. On ARM, "push" can push a	1182 // Add push instructions for preserved registers. On ARM, "push" can push a

1098 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has	1183 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has

1099 // callee-saved float/vector registers. The "vpush" instruction can handle a	1184 // callee-saved float/vector registers.
	Jim Stichnoth 2015/12/07 20:58:14 At first glance, this looks like a reflow error. At first glance, this looks like a reflow error. Maybe add a blank "//" line to separate paragraphs? John 2015/12/08 13:54:25 Done. Show quoted text On 2015/12/07 20:58:14, stichnot wrote: > At first glance, this looks like a reflow error. Maybe add a blank "//" line to > separate paragraphs? Done.
1100 // whole list of float/vector registers, but it only handles contiguous	1185 // The "vpush" instruction can handle a whole list of float/vector registers,

1101 // sequences of registers by specifying the start and the length.	1186 // but it only handles contiguous sequences of registers by specifying the

1102 VarList GPRsToPreserve;	1187 // start and the length.

1103 GPRsToPreserve.reserve(CalleeSaves.size());	1188 PreservedGPRs.reserve(CalleeSaves.size());

1104 uint32_t NumCallee = 0;	1189 PreservedSRegs.reserve(CalleeSaves.size());

1105 size_t PreservedRegsSizeBytes = 0;	1190

1106 // Consider FP and LR as callee-save / used as needed.	1191 // Consider FP and LR as callee-save / used as needed.

1107 if (UsesFramePointer) {	1192 if (UsesFramePointer) {

	1193 if (RegsUsed[RegARM32::Reg_fp]) {

	1194 llvm::report_fatal_error("Frame pointer has been used.");

	1195 }

1108 CalleeSaves[RegARM32::Reg_fp] = true;	1196 CalleeSaves[RegARM32::Reg_fp] = true;

1109 assert(RegsUsed[RegARM32::Reg_fp] == false);

1110 RegsUsed[RegARM32::Reg_fp] = true;	1197 RegsUsed[RegARM32::Reg_fp] = true;

1111 }	1198 }

1112 if (!MaybeLeafFunc) {	1199 if (!MaybeLeafFunc) {

1113 CalleeSaves[RegARM32::Reg_lr] = true;	1200 CalleeSaves[RegARM32::Reg_lr] = true;

1114 RegsUsed[RegARM32::Reg_lr] = true;	1201 RegsUsed[RegARM32::Reg_lr] = true;

1115 }	1202 }

	1203

	1204 // Perform a two-pass over the used registers. The first pass records all the
	Jim Stichnoth 2015/12/07 20:58:14 "Make two passes over the used registers." "Make two passes over the used registers." John 2015/12/08 13:54:25 Done. Show quoted text On 2015/12/07 20:58:14, stichnot wrote: > "Make two passes over the used registers." Done.
	1205 // used registers -- and their aliases. Then, we figure out which GPRs and

	1206 // VFP S registers should be saved. We don't bother saving D/Q registers

	1207 // because their uses are recorded as S regs uses.

	1208 llvm::SmallBitVector ToPreserve(RegARM32::Reg_NUM);

1116 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {	1209 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

1117 if (RegARM32::isI64RegisterPair(i)) {	1210 if (NeedSandboxing && i == RegARM32::Reg_r9) {

1118 // We don't save register pairs explicitly. Instead, we rely on the code	1211 // r9 is never updated in sandboxed code.

1119 // fake-defing/fake-using each register in the pair.

1120 continue;	1212 continue;

1121 }	1213 }

1122 if (CalleeSaves[i] && RegsUsed[i]) {	1214 if (CalleeSaves[i] && RegsUsed[i]) {

1123 if (NeedSandboxing && i == RegARM32::Reg_r9) {	1215 ToPreserve \|= RegisterAliases[i];

1124 // r9 is never updated in sandboxed code.	1216 }

	1217 }

	1218

	1219 uint32_t NumCallee = 0;

	1220 size_t PreservedRegsSizeBytes = 0;

	1221

	1222 // RegClasses is a tuple of

	1223 //

	1224 // <First Register in Class, Last Register in Class, Vector of Save Registers>

	1225 //

	1226 // We use this tuple to figure out which register we should push/pop during

	1227 // prolog/epilog.

	1228 const std::tuple<uint32_t, uint32_t, VarList *> RegClasses[] = {

	1229 {RegARM32::Reg_GPR_First, RegARM32::Reg_GPR_Last, &PreservedGPRs},

	1230 {RegARM32::Reg_SREG_First, RegARM32::Reg_SREG_Last, &PreservedSRegs}};

	1231 for (const auto &RegClass : RegClasses) {

	1232 const uint32_t FirstRegInClass = std::get<0>(RegClass);

	1233 const uint32_t LastRegInClass = std::get<1>(RegClass);

	1234 VarList *const PreservedRegsInClass = std::get<2>(RegClass);

	1235 for (uint32_t Reg = FirstRegInClass; Reg <= LastRegInClass; ++Reg) {

	1236 if (!ToPreserve[Reg]) {

1125 continue;	1237 continue;

1126 }	1238 }

1127 ++NumCallee;	1239 ++NumCallee;

1128 Variable *PhysicalRegister = getPhysicalRegister(i);	1240 Variable *PhysicalRegister = getPhysicalRegister(Reg);

1129 PreservedRegsSizeBytes +=	1241 PreservedRegsSizeBytes +=

1130 typeWidthInBytesOnStack(PhysicalRegister->getType());	1242 typeWidthInBytesOnStack(PhysicalRegister->getType());

1131 GPRsToPreserve.push_back(getPhysicalRegister(i));	1243 PreservedRegsInClass->push_back(PhysicalRegister);

1132 }	1244 }

1133 }	1245 }

	1246

1134 Ctx->statsUpdateRegistersSaved(NumCallee);	1247 Ctx->statsUpdateRegistersSaved(NumCallee);

1135 if (!GPRsToPreserve.empty())	1248 if (!PreservedSRegs.empty())

1136 _push(GPRsToPreserve);	1249 _push(PreservedSRegs);

	1250 if (!PreservedGPRs.empty())

	1251 _push(PreservedGPRs);

1137	1252

1138 // Generate "mov FP, SP" if needed.	1253 // Generate "mov FP, SP" if needed.

1139 if (UsesFramePointer) {	1254 if (UsesFramePointer) {

1140 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);	1255 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);

1141 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);	1256 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);

1142 _mov(FP, SP);	1257 _mov(FP, SP);

1143 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).	1258 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).

1144 Context.insert(InstFakeUse::create(Func, FP));	1259 Context.insert(InstFakeUse::create(Func, FP));

1145 }	1260 }

1146	1261

1147 // Align the variables area. SpillAreaPaddingBytes is the size of the region	1262 // Align the variables area. SpillAreaPaddingBytes is the size of the region

1148 // after the preserved registers and before the spill areas.	1263 // after the preserved registers and before the spill areas.

1149 // LocalsSlotsPaddingBytes is the amount of padding between the globals and	1264 // LocalsSlotsPaddingBytes is the amount of padding between the globals and

1150 // locals area if they are separate.	1265 // locals area if they are separate.

1151 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);	1266 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);

1152 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);	1267 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);

1153 uint32_t SpillAreaPaddingBytes = 0;	1268 uint32_t SpillAreaPaddingBytes = 0;

1154 uint32_t LocalsSlotsPaddingBytes = 0;	1269 uint32_t LocalsSlotsPaddingBytes = 0;

1155 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,	1270 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,

1156 GlobalsSize, LocalsSlotsAlignmentBytes,	1271 GlobalsSize, LocalsSlotsAlignmentBytes,

1157 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);	1272 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);

1158 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;	1273 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;

1159 uint32_t GlobalsAndSubsequentPaddingSize =	1274 uint32_t GlobalsAndSubsequentPaddingSize =

1160 GlobalsSize + LocalsSlotsPaddingBytes;	1275 GlobalsSize + LocalsSlotsPaddingBytes;

1161	1276

1162 // Adds the out args space to the stack, and align SP if necessary.	1277 // Adds the out args space to the stack, and align SP if necessary.

1163 if (NeedsStackAlignment) {	1278 if (!NeedsStackAlignment) {

	1279 SpillAreaSizeBytes += MaxOutArgsSizeBytes;

	1280 } else {

1164 uint32_t StackOffset = PreservedRegsSizeBytes;	1281 uint32_t StackOffset = PreservedRegsSizeBytes;

1165 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);	1282 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);

1166 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);	1283 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);

1167 SpillAreaSizeBytes = StackSize - StackOffset;	1284 SpillAreaSizeBytes = StackSize - StackOffset;

1168 } else {

1169 SpillAreaSizeBytes += MaxOutArgsSizeBytes;

1170 }	1285 }

1171	1286

1172 // Combine fixed alloca with SpillAreaSize.	1287 // Combine fixed alloca with SpillAreaSize.

1173 SpillAreaSizeBytes += FixedAllocaSizeBytes;	1288 SpillAreaSizeBytes += FixedAllocaSizeBytes;

1174	1289

1175 // Generate "sub sp, SpillAreaSizeBytes"	1290 // Generate "sub sp, SpillAreaSizeBytes"

1176 if (SpillAreaSizeBytes) {	1291 if (SpillAreaSizeBytes) {

1177 // Use the scratch register if needed to legalize the immediate.	1292 // Use the scratch register if needed to legalize the immediate.

1178 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),	1293 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),

1179 Legal_Reg \| Legal_Flex, getReservedTmpReg());	1294 Legal_Reg \| Legal_Flex, getReservedTmpReg());

(...skipping 98 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1278 // add SP, SpillAreaSizeBytes	1393 // add SP, SpillAreaSizeBytes

1279 if (SpillAreaSizeBytes) {	1394 if (SpillAreaSizeBytes) {

1280 // Use the scratch register if needed to legalize the immediate.	1395 // Use the scratch register if needed to legalize the immediate.

1281 Operand *AddAmount =	1396 Operand *AddAmount =

1282 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),	1397 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),

1283 Legal_Reg \| Legal_Flex, getReservedTmpReg());	1398 Legal_Reg \| Legal_Flex, getReservedTmpReg());

1284 Sandboxer(this).add_sp(AddAmount);	1399 Sandboxer(this).add_sp(AddAmount);

1285 }	1400 }

1286 }	1401 }

1287	1402

1288 // Add pop instructions for preserved registers.	1403 if (!PreservedGPRs.empty())

1289 llvm::SmallBitVector CalleeSaves =	1404 _pop(PreservedGPRs);

1290 getRegisterSet(RegSet_CalleeSave, RegSet_None);	1405 if (!PreservedSRegs.empty())

1291 VarList GPRsToRestore;	1406 _pop(PreservedSRegs);

1292 GPRsToRestore.reserve(CalleeSaves.size());

1293 // Consider FP and LR as callee-save / used as needed.

1294 if (UsesFramePointer) {

1295 CalleeSaves[RegARM32::Reg_fp] = true;

1296 }

1297 if (!MaybeLeafFunc) {

1298 CalleeSaves[RegARM32::Reg_lr] = true;

1299 }

1300 // Pop registers in ascending order just like push (instead of in reverse

1301 // order).

1302 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

1303 if (RegARM32::isI64RegisterPair(i)) {

1304 continue;

1305 }

1306

1307 if (CalleeSaves[i] && RegsUsed[i]) {

1308 if (NeedSandboxing && i == RegARM32::Reg_r9) {

1309 continue;

1310 }

1311 GPRsToRestore.push_back(getPhysicalRegister(i));

1312 }

1313 }

1314 if (!GPRsToRestore.empty())

1315 _pop(GPRsToRestore);

1316	1407

1317 if (!Ctx->getFlags().getUseSandboxing())	1408 if (!Ctx->getFlags().getUseSandboxing())

1318 return;	1409 return;

1319	1410

1320 // Change the original ret instruction into a sandboxed return sequence.	1411 // Change the original ret instruction into a sandboxed return sequence.

	1412 //

1321 // bundle_lock	1413 // bundle_lock

1322 // bic lr, #0xc000000f	1414 // bic lr, #0xc000000f

1323 // bx lr	1415 // bx lr

1324 // bundle_unlock	1416 // bundle_unlock

	1417 //

1325 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to	1418 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to

1326 // restrict to the lower 1GB as well.	1419 // restrict to the lower 1GB as well.

1327 Variable *LR = getPhysicalRegister(RegARM32::Reg_lr);	1420 Variable *LR = getPhysicalRegister(RegARM32::Reg_lr);

1328 Variable *RetValue = nullptr;	1421 Variable *RetValue = nullptr;

1329 if (RI->getSrcSize())	1422 if (RI->getSrcSize())

1330 RetValue = llvm::cast<Variable>(RI->getSrc(0));	1423 RetValue = llvm::cast<Variable>(RI->getSrc(0));

1331	1424

1332 Sandboxer(this).ret(LR, RetValue);	1425 Sandboxer(this).ret(LR, RetValue);

1333	1426

1334 RI->setDeleted();	1427 RI->setDeleted();

(...skipping 1299 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2634 Operations)) {	2727 Operations)) {

2635 return false;	2728 return false;

2636 }	2729 }

2637 }	2730 }

2638	2731

2639 return Src == 0;	2732 return Src == 0;

2640 }	2733 }

2641 } // end of namespace StrengthReduction	2734 } // end of namespace StrengthReduction

2642 } // end of anonymous namespace	2735 } // end of anonymous namespace

2643	2736

2644 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {	2737 void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {

2645 Variable *Dest = Inst->getDest();	2738 Variable *Dest = Instr->getDest();

2646	2739

2647 if (Dest->isRematerializable()) {	2740 if (Dest->isRematerializable()) {

2648 Context.insert(InstFakeDef::create(Func, Dest));	2741 Context.insert(InstFakeDef::create(Func, Dest));

2649 return;	2742 return;

2650 }	2743 }

2651	2744

2652 Type DestTy = Dest->getType();	2745 Type DestTy = Dest->getType();

2653 if (DestTy == IceType_i1) {	2746 if (DestTy == IceType_i1) {

2654 lowerInt1Arithmetic(Inst);	2747 lowerInt1Arithmetic(Instr);

2655 return;	2748 return;

2656 }	2749 }

2657	2750

2658 Operand *Src0 = legalizeUndef(Inst->getSrc(0));	2751 Operand *Src0 = legalizeUndef(Instr->getSrc(0));

2659 Operand *Src1 = legalizeUndef(Inst->getSrc(1));	2752 Operand *Src1 = legalizeUndef(Instr->getSrc(1));

2660 if (DestTy == IceType_i64) {	2753 if (DestTy == IceType_i64) {

2661 lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1);	2754 lowerInt64Arithmetic(Instr->getOp(), Instr->getDest(), Src0, Src1);

2662 return;	2755 return;

2663 }	2756 }

2664	2757

2665 if (isVectorType(DestTy)) {	2758 if (isVectorType(DestTy)) {

2666 // Add a fake def to keep liveness consistent in the meantime.	2759 // Add a fake def to keep liveness consistent in the meantime.

2667 Variable *T = makeReg(DestTy);	2760 Variable *T = makeReg(DestTy);

2668 Context.insert(InstFakeDef::create(Func, T));	2761 Context.insert(InstFakeDef::create(Func, T));

2669 _mov(Dest, T);	2762 _mov(Dest, T);

2670 UnimplementedError(Func->getContext()->getFlags());	2763 UnimplementedError(Func->getContext()->getFlags());

2671 return;	2764 return;

2672 }	2765 }

2673	2766

2674 // DestTy is a non-i64 scalar.	2767 // DestTy is a non-i64 scalar.

2675 Variable *T = makeReg(DestTy);	2768 Variable *T = makeReg(DestTy);

2676	2769

2677 // * Handle div/rem separately. They require a non-legalized Src1 to inspect	2770 // * Handle div/rem separately. They require a non-legalized Src1 to inspect

2678 // whether or not Src1 is a non-zero constant. Once legalized it is more	2771 // whether or not Src1 is a non-zero constant. Once legalized it is more

2679 // difficult to determine (constant may be moved to a register).	2772 // difficult to determine (constant may be moved to a register).

2680 // * Handle floating point arithmetic separately: they require Src1 to be	2773 // * Handle floating point arithmetic separately: they require Src1 to be

2681 // legalized to a register.	2774 // legalized to a register.

2682 switch (Inst->getOp()) {	2775 switch (Instr->getOp()) {

2683 default:	2776 default:

2684 break;	2777 break;

2685 case InstArithmetic::Udiv: {	2778 case InstArithmetic::Udiv: {

2686 constexpr bool NotRemainder = false;	2779 constexpr bool NotRemainder = false;

2687 Variable *Src0R = legalizeToReg(Src0);	2780 Variable *Src0R = legalizeToReg(Src0);

2688 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,	2781 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,

2689 NotRemainder);	2782 NotRemainder);

2690 return;	2783 return;

2691 }	2784 }

2692 case InstArithmetic::Sdiv: {	2785 case InstArithmetic::Sdiv: {

(...skipping 18 matching lines...) Expand all Loading...
2711 return;	2804 return;

2712 }	2805 }

2713 case InstArithmetic::Frem: {	2806 case InstArithmetic::Frem: {

2714 if (!isScalarFloatingType(DestTy)) {	2807 if (!isScalarFloatingType(DestTy)) {

2715 llvm::report_fatal_error("Unexpected type when lowering frem.");	2808 llvm::report_fatal_error("Unexpected type when lowering frem.");

2716 }	2809 }

2717 llvm::report_fatal_error("Frem should have already been lowered.");	2810 llvm::report_fatal_error("Frem should have already been lowered.");

2718 }	2811 }

2719 case InstArithmetic::Fadd: {	2812 case InstArithmetic::Fadd: {

2720 Variable *Src0R = legalizeToReg(Src0);	2813 Variable *Src0R = legalizeToReg(Src0);

	2814 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {

	2815 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));

	2816 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));

	2817 _vmla(Src0R, Src1R, Src2R);

	2818 _mov(Dest, Src0R);

	2819 return;

	2820 }

	2821

2721 Variable *Src1R = legalizeToReg(Src1);	2822 Variable *Src1R = legalizeToReg(Src1);

2722 _vadd(T, Src0R, Src1R);	2823 _vadd(T, Src0R, Src1R);

2723 _mov(Dest, T);	2824 _mov(Dest, T);

2724 return;	2825 return;

2725 }	2826 }

2726 case InstArithmetic::Fsub: {	2827 case InstArithmetic::Fsub: {

2727 Variable *Src0R = legalizeToReg(Src0);	2828 Variable *Src0R = legalizeToReg(Src0);

	2829 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {

	2830 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));

	2831 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));

	2832 _vmls(Src0R, Src1R, Src2R);

	2833 _mov(Dest, Src0R);

	2834 return;

	2835 }

2728 Variable *Src1R = legalizeToReg(Src1);	2836 Variable *Src1R = legalizeToReg(Src1);

2729 _vsub(T, Src0R, Src1R);	2837 _vsub(T, Src0R, Src1R);

2730 _mov(Dest, T);	2838 _mov(Dest, T);

2731 return;	2839 return;

2732 }	2840 }

2733 case InstArithmetic::Fmul: {	2841 case InstArithmetic::Fmul: {

2734 Variable *Src0R = legalizeToReg(Src0);	2842 Variable *Src0R = legalizeToReg(Src0);

2735 Variable *Src1R = legalizeToReg(Src1);	2843 Variable *Src1R = legalizeToReg(Src1);

2736 _vmul(T, Src0R, Src1R);	2844 _vmul(T, Src0R, Src1R);

2737 _mov(Dest, T);	2845 _mov(Dest, T);

2738 return;	2846 return;

2739 }	2847 }

2740 case InstArithmetic::Fdiv: {	2848 case InstArithmetic::Fdiv: {

2741 Variable *Src0R = legalizeToReg(Src0);	2849 Variable *Src0R = legalizeToReg(Src0);

2742 Variable *Src1R = legalizeToReg(Src1);	2850 Variable *Src1R = legalizeToReg(Src1);

2743 _vdiv(T, Src0R, Src1R);	2851 _vdiv(T, Src0R, Src1R);

2744 _mov(Dest, T);	2852 _mov(Dest, T);

2745 return;	2853 return;

2746 }	2854 }

2747 }	2855 }

2748	2856

2749 // Handle everything else here.	2857 // Handle everything else here.

2750 Int32Operands Srcs(Src0, Src1);	2858 Int32Operands Srcs(Src0, Src1);

2751 switch (Inst->getOp()) {	2859 switch (Instr->getOp()) {

2752 case InstArithmetic::_num:	2860 case InstArithmetic::_num:

2753 llvm::report_fatal_error("Unknown arithmetic operator");	2861 llvm::report_fatal_error("Unknown arithmetic operator");

2754 return;	2862 return;

2755 case InstArithmetic::Add: {	2863 case InstArithmetic::Add: {

	2864 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {

	2865 Variable *Src0R = legalizeToReg(Src0);

	2866 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));

	2867 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));

	2868 _mla(T, Src1R, Src2R, Src0R);

	2869 _mov(Dest, T);

	2870 return;

	2871 }

	2872

2756 if (Srcs.hasConstOperand()) {	2873 if (Srcs.hasConstOperand()) {

2757 if (!Srcs.immediateIsFlexEncodable() &&	2874 if (!Srcs.immediateIsFlexEncodable() &&

2758 Srcs.negatedImmediateIsFlexEncodable()) {	2875 Srcs.negatedImmediateIsFlexEncodable()) {

2759 Variable *Src0R = Srcs.src0R(this);	2876 Variable *Src0R = Srcs.src0R(this);

2760 Operand *Src1F = Srcs.negatedSrc1F(this);	2877 Operand *Src1F = Srcs.negatedSrc1F(this);

2761 if (!Srcs.swappedOperands()) {	2878 if (!Srcs.swappedOperands()) {

2762 _sub(T, Src0R, Src1F);	2879 _sub(T, Src0R, Src1F);

2763 } else {	2880 } else {

2764 _rsb(T, Src0R, Src1F);	2881 _rsb(T, Src0R, Src1F);

2765 }	2882 }

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2798 return;	2915 return;

2799 }	2916 }

2800 case InstArithmetic::Xor: {	2917 case InstArithmetic::Xor: {

2801 Variable *Src0R = Srcs.src0R(this);	2918 Variable *Src0R = Srcs.src0R(this);

2802 Operand *Src1RF = Srcs.src1RF(this);	2919 Operand *Src1RF = Srcs.src1RF(this);

2803 _eor(T, Src0R, Src1RF);	2920 _eor(T, Src0R, Src1RF);

2804 _mov(Dest, T);	2921 _mov(Dest, T);

2805 return;	2922 return;

2806 }	2923 }

2807 case InstArithmetic::Sub: {	2924 case InstArithmetic::Sub: {

	2925 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {

	2926 Variable *Src0R = legalizeToReg(Src0);

	2927 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));

	2928 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));

	2929 _mls(T, Src1R, Src2R, Src0R);

	2930 _mov(Dest, T);

	2931 return;

	2932 }

	2933

2808 if (Srcs.hasConstOperand()) {	2934 if (Srcs.hasConstOperand()) {

2809 if (Srcs.immediateIsFlexEncodable()) {	2935 if (Srcs.immediateIsFlexEncodable()) {

2810 Variable *Src0R = Srcs.src0R(this);	2936 Variable *Src0R = Srcs.src0R(this);

2811 Operand *Src1RF = Srcs.src1RF(this);	2937 Operand *Src1RF = Srcs.src1RF(this);

2812 if (Srcs.swappedOperands()) {	2938 if (Srcs.swappedOperands()) {

2813 _rsb(T, Src0R, Src1RF);	2939 _rsb(T, Src0R, Src1RF);

2814 } else {	2940 } else {

2815 _sub(T, Src0R, Src1RF);	2941 _sub(T, Src0R, Src1RF);

2816 }	2942 }

2817 _mov(Dest, T);	2943 _mov(Dest, T);

(...skipping 188 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3006 }	3132 }

3007 _mov(Dest, NewSrc);	3133 _mov(Dest, NewSrc);

3008 }	3134 }

3009	3135

3010 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(	3136 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(

3011 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,	3137 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,

3012 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {	3138 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {

3013 InstARM32Label *NewShortCircuitLabel = nullptr;	3139 InstARM32Label *NewShortCircuitLabel = nullptr;

3014 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg \| Legal_Flex);	3140 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg \| Legal_Flex);

3015	3141

3016 const Inst *Producer = BoolComputations.getProducerOf(Boolean);	3142 const Inst *Producer = Computations.getProducerOf(Boolean);

3017	3143

3018 if (Producer == nullptr) {	3144 if (Producer == nullptr) {

3019 // No producer, no problem: just do emit code to perform (Boolean & 1) and	3145 // No producer, no problem: just do emit code to perform (Boolean & 1) and

3020 // set the flags register. The branch should be taken if the resulting flags	3146 // set the flags register. The branch should be taken if the resulting flags

3021 // indicate a non-zero result.	3147 // indicate a non-zero result.

3022 _tst(legalizeToReg(Boolean), _1);	3148 _tst(legalizeToReg(Boolean), _1);

3023 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));	3149 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));

3024 }	3150 }

3025	3151

3026 switch (Producer->getKind()) {	3152 switch (Producer->getKind()) {

(...skipping 200 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3227 Variable *ReturnReg = nullptr;	3353 Variable *ReturnReg = nullptr;

3228 Variable *ReturnRegHi = nullptr;	3354 Variable *ReturnRegHi = nullptr;

3229 if (Dest) {	3355 if (Dest) {

3230 switch (Dest->getType()) {	3356 switch (Dest->getType()) {

3231 case IceType_NUM:	3357 case IceType_NUM:

3232 llvm::report_fatal_error("Invalid Call dest type");	3358 llvm::report_fatal_error("Invalid Call dest type");

3233 break;	3359 break;

3234 case IceType_void:	3360 case IceType_void:

3235 break;	3361 break;

3236 case IceType_i1:	3362 case IceType_i1:

3237 assert(BoolComputations.getProducerOf(Dest) == nullptr);	3363 assert(Computations.getProducerOf(Dest) == nullptr);

3238 // Fall-through intended.	3364 // Fall-through intended.

3239 case IceType_i8:	3365 case IceType_i8:

3240 case IceType_i16:	3366 case IceType_i16:

3241 case IceType_i32:	3367 case IceType_i32:

3242 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);	3368 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);

3243 break;	3369 break;

3244 case IceType_i64:	3370 case IceType_i64:

3245 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);	3371 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);

3246 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);	3372 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);

3247 break;	3373 break;

(...skipping 2157 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5405 }	5531 }

5406	5532

5407 if (llvm::isa<Constant>(From)) {	5533 if (llvm::isa<Constant>(From)) {

5408 if (llvm::isa<ConstantUndef>(From)) {	5534 if (llvm::isa<ConstantUndef>(From)) {

5409 From = legalizeUndef(From, RegNum);	5535 From = legalizeUndef(From, RegNum);

5410 if (isVectorType(Ty))	5536 if (isVectorType(Ty))

5411 return From;	5537 return From;

5412 }	5538 }

5413 // There should be no constants of vector type (other than undef).	5539 // There should be no constants of vector type (other than undef).

5414 assert(!isVectorType(Ty));	5540 assert(!isVectorType(Ty));

5415 bool CanBeFlex = Allowed & Legal_Flex;

5416 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {	5541 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {

5417 uint32_t RotateAmt;	5542 uint32_t RotateAmt;

5418 uint32_t Immed_8;	5543 uint32_t Immed_8;

5419 uint32_t Value = static_cast<uint32_t>(C32->getValue());	5544 uint32_t Value = static_cast<uint32_t>(C32->getValue());

5420 // Check if the immediate will fit in a Flexible second operand, if a	5545 if (OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {

5421 // Flexible second operand is allowed. We need to know the exact value,	5546 // The immediate can be encoded as a Flex immediate. We may return the

5422 // so that rules out relocatable constants. Also try the inverse and use	5547 // Flex operand if the caller has Allow'ed it.

5423 // MVN if possible.	5548 auto *OpF = OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);

5424 if (CanBeFlex &&	5549 const bool CanBeFlex = Allowed & Legal_Flex;

5425 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {	5550 if (CanBeFlex)

5426 return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);	5551 return OpF;

5427 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm(	5552 return copyToReg(OpF, RegNum);

5428 ~Value, &RotateAmt, &Immed_8)) {	5553 } else if (OperandARM32FlexImm::canHoldImm(~Value, &RotateAmt,
	Jim Stichnoth 2015/12/07 20:58:14 No "else if" after unconditional return. No "else if" after unconditional return. John 2015/12/08 13:54:24 In my defense, this was here prior to this CL. :P Show quoted text On 2015/12/07 20:58:14, stichnot wrote: > No "else if" after unconditional return. In my defense, this was here prior to this CL. :P I could change this now, on this CL, but that would mean this code would be inconsistent with the rest of the method. I left a TODO(jpp) on top of the method as a reminder that I need to refactor legalize. Thoughts?
5429 auto InvertedFlex =	5554 &Immed_8)) {

	5555 // Even though the immediate can't be encoded as a Flex operand, its

	5556 // inverted bit pattern can, thus we use ARM's mvn to load the 32-bit

	5557 // constant with a single instruction.

	5558 auto *InvOpF =

5430 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);	5559 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);

5431 Variable *Reg = makeReg(Ty, RegNum);	5560 Variable *Reg = makeReg(Ty, RegNum);

5432 _mvn(Reg, InvertedFlex);	5561 _mvn(Reg, InvOpF);

5433 return Reg;	5562 return Reg;

5434 } else {	5563 } else {

5435 // Do a movw/movt to a register.	5564 // Do a movw/movt to a register.

5436 Variable *Reg = makeReg(Ty, RegNum);	5565 Variable *Reg = makeReg(Ty, RegNum);

5437 uint32_t UpperBits = (Value >> 16) & 0xFFFF;	5566 uint32_t UpperBits = (Value >> 16) & 0xFFFF;

5438 _movw(Reg,	5567 _movw(Reg,

5439 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);	5568 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);

5440 if (UpperBits != 0) {	5569 if (UpperBits != 0) {

5441 _movt(Reg, Ctx->getConstantInt32(UpperBits));	5570 _movt(Reg, Ctx->getConstantInt32(UpperBits));

5442 }	5571 }

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5479 return copyToReg(From, RegNum);	5608 return copyToReg(From, RegNum);

5480 }	5609 }

5481 }	5610 }

5482	5611

5483 if (auto *Var = llvm::dyn_cast<Variable>(From)) {	5612 if (auto *Var = llvm::dyn_cast<Variable>(From)) {

5484 if (Var->isRematerializable()) {	5613 if (Var->isRematerializable()) {

5485 if (Allowed & Legal_Rematerializable) {	5614 if (Allowed & Legal_Rematerializable) {

5486 return From;	5615 return From;

5487 }	5616 }

5488	5617

5489 // TODO(jpp): We don't need to rematerialize Var if legalize() was invoked

5490 // for a Variable in a Mem operand.

5491 Variable *T = makeReg(Var->getType(), RegNum);	5618 Variable *T = makeReg(Var->getType(), RegNum);

5492 _mov(T, Var);	5619 _mov(T, Var);

5493 return T;	5620 return T;

5494 }	5621 }

5495 // Check if the variable is guaranteed a physical register. This can happen	5622 // Check if the variable is guaranteed a physical register. This can happen

5496 // either when the variable is pre-colored or when it is assigned infinite	5623 // either when the variable is pre-colored or when it is assigned infinite

5497 // weight.	5624 // weight.

5498 bool MustHaveRegister = (Var->hasReg() \|\| Var->mustHaveReg());	5625 bool MustHaveRegister = (Var->hasReg() \|\| Var->mustHaveReg());

5499 // We need a new physical register for the operand if:	5626 // We need a new physical register for the operand if:

5500 // Mem is not allowed and Var isn't guaranteed a physical	5627 // Mem is not allowed and Var isn't guaranteed a physical

(...skipping 180 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5681	5808

5682 _mov(T_Lo, legalize(FalseValueLo, Legal_Reg \| Legal_Flex));	5809 _mov(T_Lo, legalize(FalseValueLo, Legal_Reg \| Legal_Flex));

5683 if (DestHi) {	5810 if (DestHi) {

5684 _mov(T_Hi, legalize(FalseValueHi, Legal_Reg \| Legal_Flex));	5811 _mov(T_Hi, legalize(FalseValueHi, Legal_Reg \| Legal_Flex));

5685 }	5812 }

5686	5813

5687 CondWhenTrue Cond(CondARM32::kNone);	5814 CondWhenTrue Cond(CondARM32::kNone);

5688 // FlagsWereSet is used to determine wether Boolean was folded or not. If not,	5815 // FlagsWereSet is used to determine wether Boolean was folded or not. If not,

5689 // add an explicit _tst instruction below.	5816 // add an explicit _tst instruction below.

5690 bool FlagsWereSet = false;	5817 bool FlagsWereSet = false;

5691 if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) {	5818 if (const Inst *Producer = Computations.getProducerOf(Boolean)) {

5692 switch (Producer->getKind()) {	5819 switch (Producer->getKind()) {

5693 default:	5820 default:

5694 llvm::report_fatal_error("Unexpected producer.");	5821 llvm::report_fatal_error("Unexpected producer.");

5695 case Inst::Icmp: {	5822 case Inst::Icmp: {

5696 Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));	5823 Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));

5697 FlagsWereSet = true;	5824 FlagsWereSet = true;

5698 } break;	5825 } break;

5699 case Inst::Fcmp: {	5826 case Inst::Fcmp: {

5700 Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));	5827 Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));

5701 FlagsWereSet = true;	5828 FlagsWereSet = true;

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5765	5892

5766 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest,	5893 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest,

5767 Operand *Boolean) {	5894 Operand *Boolean) {

5768 assert(Boolean->getType() == IceType_i1);	5895 assert(Boolean->getType() == IceType_i1);

5769 Variable *T = makeReg(IceType_i1);	5896 Variable *T = makeReg(IceType_i1);

5770 Operand *_0 =	5897 Operand *_0 =

5771 legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg \| Legal_Flex);	5898 legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg \| Legal_Flex);

5772 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg \| Legal_Flex);	5899 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg \| Legal_Flex);

5773	5900

5774 SafeBoolChain Safe = SBC_Yes;	5901 SafeBoolChain Safe = SBC_Yes;

5775 if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) {	5902 if (const Inst *Producer = Computations.getProducerOf(Boolean)) {

5776 switch (Producer->getKind()) {	5903 switch (Producer->getKind()) {

5777 default:	5904 default:

5778 llvm::report_fatal_error("Unexpected producer.");	5905 llvm::report_fatal_error("Unexpected producer.");

5779 case Inst::Icmp: {	5906 case Inst::Icmp: {

5780 _mov(T, _0);	5907 _mov(T, _0);

5781 CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));	5908 CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));

5782 assert(Cond.WhenTrue0 != CondARM32::AL);	5909 assert(Cond.WhenTrue0 != CondARM32::AL);

5783 assert(Cond.WhenTrue0 != CondARM32::kNone);	5910 assert(Cond.WhenTrue0 != CondARM32::kNone);

5784 assert(Cond.WhenTrue1 == CondARM32::kNone);	5911 assert(Cond.WhenTrue1 == CondARM32::kNone);

5785 _mov_redefined(T, _1, Cond.WhenTrue0);	5912 _mov_redefined(T, _1, Cond.WhenTrue0);

(...skipping 91 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5877 return false;	6004 return false;

5878 case InstArithmetic::And:	6005 case InstArithmetic::And:

5879 return !isVectorType(Instr.getDest()->getType());	6006 return !isVectorType(Instr.getDest()->getType());

5880 case InstArithmetic::Or:	6007 case InstArithmetic::Or:

5881 return !isVectorType(Instr.getDest()->getType());	6008 return !isVectorType(Instr.getDest()->getType());

5882 }	6009 }

5883 }	6010 }

5884 }	6011 }

5885 }	6012 }

5886 } // end of namespace BoolFolding	6013 } // end of namespace BoolFolding

	6014

	6015 namespace FpFolding {

	6016 bool shouldTrackProducer(const Inst &Instr) {

	6017 switch (Instr.getKind()) {

	6018 default:

	6019 return false;

	6020 case Inst::Arithmetic: {

	6021 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {

	6022 default:

	6023 return false;

	6024 case InstArithmetic::Fmul:

	6025 return true;

	6026 }

	6027 }

	6028 }

	6029 }

	6030

	6031 bool isValidConsumer(const Inst &Instr) {

	6032 switch (Instr.getKind()) {

	6033 default:

	6034 return false;

	6035 case Inst::Arithmetic: {

	6036 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {

	6037 default:

	6038 return false;

	6039 case InstArithmetic::Fadd:

	6040 case InstArithmetic::Fsub:

	6041 return true;

	6042 }

	6043 }

	6044 }

	6045 }

	6046 } // end of namespace FpFolding

	6047

	6048 namespace IntFolding {

	6049 bool shouldTrackProducer(const Inst &Instr) {

	6050 switch (Instr.getKind()) {

	6051 default:

	6052 return false;

	6053 case Inst::Arithmetic: {

	6054 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {

	6055 default:

	6056 return false;

	6057 case InstArithmetic::Mul:

	6058 return true;

	6059 }

	6060 }

	6061 }

	6062 }

	6063

	6064 bool isValidConsumer(const Inst &Instr) {

	6065 switch (Instr.getKind()) {

	6066 default:

	6067 return false;

	6068 case Inst::Arithmetic: {

	6069 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {

	6070 default:

	6071 return false;

	6072 case InstArithmetic::Add:

	6073 case InstArithmetic::Sub:

	6074 return true;

	6075 }

	6076 }

	6077 }

	6078 }

	6079 } // end of namespace FpFolding

5887 } // end of anonymous namespace	6080 } // end of anonymous namespace

5888	6081

5889 void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) {	6082 void TargetARM32::ComputationTracker::recordProducers(CfgNode *Node) {

5890 for (Inst &Instr : Node->getInsts()) {	6083 for (Inst &Instr : Node->getInsts()) {

5891 // Check whether Instr is a valid producer.	6084 // Check whether Instr is a valid producer.

5892 Variable *Dest = Instr.getDest();	6085 Variable *Dest = Instr.getDest();

5893 if (!Instr.isDeleted() // only consider non-deleted instructions; and	6086 if (!Instr.isDeleted() // only consider non-deleted instructions; and

5894 && Dest // only instructions with an actual dest var; and	6087 && Dest // only instructions with an actual dest var; and

5895 && Dest->getType() == IceType_i1 // only bool-type dest vars; and	6088 && Dest->getType() == IceType_i1 // only bool-type dest vars; and

5896 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.	6089 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.

5897 KnownComputations.emplace(Dest->getIndex(), BoolComputationEntry(&Instr));	6090 KnownComputations.emplace(Dest->getIndex(),

	6091 ComputationEntry(&Instr, IceType_i1));

	6092 }

	6093 if (!Instr.isDeleted() // only consider non-deleted instructions; and

	6094 && Dest // only instructions with an actual dest var; and

	6095 && isScalarFloatingType(Dest->getType()) // fp-type only dest vars; and

	6096 && FpFolding::shouldTrackProducer(Instr)) { // white-listed instr.

	6097 KnownComputations.emplace(Dest->getIndex(),

	6098 ComputationEntry(&Instr, Dest->getType()));

	6099 }

	6100 if (!Instr.isDeleted() // only consider non-deleted instructions; and

	6101 && Dest // only instructions with an actual dest var; and

	6102 && Dest->getType() == IceType_i32 // i32 only dest vars; and

	6103 && IntFolding::shouldTrackProducer(Instr)) { // white-listed instr.

	6104 KnownComputations.emplace(Dest->getIndex(),

	6105 ComputationEntry(&Instr, IceType_i32));

5898 }	6106 }

5899 // Check each src variable against the map.	6107 // Check each src variable against the map.

5900 FOREACH_VAR_IN_INST(Var, Instr) {	6108 FOREACH_VAR_IN_INST(Var, Instr) {

5901 SizeT VarNum = Var->getIndex();	6109 SizeT VarNum = Var->getIndex();

5902 auto ComputationIter = KnownComputations.find(VarNum);	6110 auto ComputationIter = KnownComputations.find(VarNum);

5903 if (ComputationIter == KnownComputations.end()) {	6111 if (ComputationIter == KnownComputations.end()) {

5904 continue;	6112 continue;

5905 }	6113 }

5906	6114

5907 ++ComputationIter->second.NumUses;	6115 ++ComputationIter->second.NumUses;

5908 if (!BoolFolding::isValidConsumer(Instr)) {	6116 switch (ComputationIter->second.ComputationType) {

	6117 default:

5909 KnownComputations.erase(VarNum);	6118 KnownComputations.erase(VarNum);

5910 continue;	6119 continue;

	6120 case IceType_i1:

	6121 if (!BoolFolding::isValidConsumer(Instr)) {

	6122 KnownComputations.erase(VarNum);

	6123 continue;

	6124 }

	6125 break;

	6126 case IceType_i32:

	6127 if (IndexOfVarInInst(Var) != 1 \|\| !IntFolding::isValidConsumer(Instr)) {

	6128 KnownComputations.erase(VarNum);

	6129 continue;

	6130 }

	6131 break;

	6132 case IceType_f32:

	6133 case IceType_f64:

	6134 if (IndexOfVarInInst(Var) != 1 \|\| !FpFolding::isValidConsumer(Instr)) {

	6135 KnownComputations.erase(VarNum);

	6136 continue;

	6137 }

	6138 break;

5911 }	6139 }

5912	6140

5913 if (Instr.isLastUse(Var)) {	6141 if (Instr.isLastUse(Var)) {

5914 ComputationIter->second.IsLiveOut = false;	6142 ComputationIter->second.IsLiveOut = false;

5915 }	6143 }

5916 }	6144 }

5917 }	6145 }

5918	6146

5919 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();	6147 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();

5920 Iter != End;) {	6148 Iter != End;) {

(...skipping 325 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6246 // Technically R9 is used for TLS with Sandboxing, and we reserve it.	6474 // Technically R9 is used for TLS with Sandboxing, and we reserve it.

6247 // However, for compatibility with current NaCl LLVM, don't claim that.	6475 // However, for compatibility with current NaCl LLVM, don't claim that.

6248 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";	6476 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";

6249 }	6477 }

6250	6478

6251 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];	6479 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];

6252 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];	6480 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];

6253 llvm::SmallBitVector TargetARM32::ScratchRegs;	6481 llvm::SmallBitVector TargetARM32::ScratchRegs;

6254	6482

6255 } // end of namespace Ice	6483 } // end of namespace Ice

OLD	NEW

« src/IceTargetLoweringARM32.h ('K') | « src/IceTargetLoweringARM32.h ('k') | src/gen_arm32_reg_tables.py » ('j') | src/gen_arm32_reg_tables.py » ('J')