src/IceTargetLoweringX8632.cpp - Issue 397833002: Lower the rest of the vector arithmetic operations.

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 397833002: Lower the rest of the vector arithmetic operations. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master

Patch Set: Format crosstest.py Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 //

10 // This file implements the TargetLoweringX8632 class, which	10 // This file implements the TargetLoweringX8632 class, which

(...skipping 70 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
81	81

82 InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {	82 InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {

83 size_t Index = static_cast<size_t>(Cond);	83 size_t Index = static_cast<size_t>(Cond);

84 assert(Index < TableIcmp32Size);	84 assert(Index < TableIcmp32Size);

85 return TableIcmp32[Index].Mapping;	85 return TableIcmp32[Index].Mapping;

86 }	86 }

87	87

88 // The maximum number of arguments to pass in XMM registers	88 // The maximum number of arguments to pass in XMM registers

89 const unsigned X86_MAX_XMM_ARGS = 4;	89 const unsigned X86_MAX_XMM_ARGS = 4;

90	90

	91 // Return a string representation of the type that is suitable for use

	92 // in an identifier.

	93 IceString typeIdentString(const Type Ty) {
	Jim Stichnoth 2014/07/16 19:17:10 Put inside an anonymous namespace Put inside an anonymous namespace wala 2014/07/17 01:34:53 This is already inside an anonymous namespace. Show quoted text On 2014/07/16 19:17:10, stichnot wrote: > Put inside an anonymous namespace This is already inside an anonymous namespace. Jim Stichnoth 2014/07/17 13:03:13 D'oh! Sorry! Show quoted text On 2014/07/17 01:34:53, wala wrote: > On 2014/07/16 19:17:10, stichnot wrote: > > Put inside an anonymous namespace > > This is already inside an anonymous namespace. D'oh! Sorry!
	94 IceString Str;

	95 llvm::raw_string_ostream BaseOS(Str);

	96 Ostream OS(&BaseOS);

	97 if (isVectorType(Ty)) {

	98 OS << "v" << typeNumElements(Ty) << typeElementType(Ty);

	99 } else {

	100 OS << Ty;

	101 }

	102 return BaseOS.str();

	103 }

	104

91 // In some cases, there are x-macros tables for both high-level and	105 // In some cases, there are x-macros tables for both high-level and

92 // low-level instructions/operands that use the same enum key value.	106 // low-level instructions/operands that use the same enum key value.

93 // The tables are kept separate to maintain a proper separation	107 // The tables are kept separate to maintain a proper separation

94 // between abstraction layers. There is a risk that the tables	108 // between abstraction layers. There is a risk that the tables

95 // could get out of sync if enum values are reordered or if entries	109 // could get out of sync if enum values are reordered or if entries

96 // are added or deleted. This dummy function uses static_assert to	110 // are added or deleted. This dummy function uses static_assert to

97 // ensure everything is kept in sync.	111 // ensure everything is kept in sync.

98 void xMacroIntegrityCheck() {	112 void xMacroIntegrityCheck() {

99 // Validate the enum values in FCMPX8632_TABLE.	113 // Validate the enum values in FCMPX8632_TABLE.

100 {	114 {

(...skipping 49 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
150 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);	164 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);

151 ICEINSTICMP_TABLE;	165 ICEINSTICMP_TABLE;

152 #undef X	166 #undef X

153 }	167 }

154	168

155 // Validate the enum values in ICETYPEX8632_TABLE.	169 // Validate the enum values in ICETYPEX8632_TABLE.

156 {	170 {

157 // Define a temporary set of enum values based on low-level	171 // Define a temporary set of enum values based on low-level

158 // table entries.	172 // table entries.

159 enum _tmp_enum {	173 enum _tmp_enum {

160 #define X(tag, cvt, sdss, width) _tmp_##tag,	174 #define X(tag, cvt, sdss, pack, width) _tmp_##tag,

161 ICETYPEX8632_TABLE	175 ICETYPEX8632_TABLE

162 #undef X	176 #undef X

163 _num	177 _num

164 };	178 };

165 // Define a set of constants based on high-level table entries.	179 // Define a set of constants based on high-level table entries.

166 #define X(tag, size, align, elts, elty, str) \	180 #define X(tag, size, align, elts, elty, str) \

167 static const int _table1_##tag = tag;	181 static const int _table1_##tag = tag;

168 ICETYPE_TABLE;	182 ICETYPE_TABLE;

169 #undef X	183 #undef X

170 // Define a set of constants based on low-level table entries,	184 // Define a set of constants based on low-level table entries,

171 // and ensure the table entry keys are consistent.	185 // and ensure the table entry keys are consistent.

172 #define X(tag, cvt, sdss, width) \	186 #define X(tag, cvt, sdss, pack, width) \

173 static const int _table2_##tag = _tmp_##tag; \	187 static const int _table2_##tag = _tmp_##tag; \

174 STATIC_ASSERT(_table1_##tag == _table2_##tag);	188 STATIC_ASSERT(_table1_##tag == _table2_##tag);

175 ICETYPEX8632_TABLE;	189 ICETYPEX8632_TABLE;

176 #undef X	190 #undef X

177 // Repeat the static asserts with respect to the high-level	191 // Repeat the static asserts with respect to the high-level

178 // table entries in case the high-level table has extra entries.	192 // table entries in case the high-level table has extra entries.

179 #define X(tag, size, align, elts, elty, str) \	193 #define X(tag, size, align, elts, elty, str) \

180 STATIC_ASSERT(_table1_##tag == _table2_##tag);	194 STATIC_ASSERT(_table1_##tag == _table2_##tag);

181 ICETYPE_TABLE;	195 ICETYPE_TABLE;

182 #undef X	196 #undef X

(...skipping 947 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1130 } break;	1144 } break;

1131 case InstArithmetic::Fadd:	1145 case InstArithmetic::Fadd:

1132 case InstArithmetic::Fsub:	1146 case InstArithmetic::Fsub:

1133 case InstArithmetic::Fmul:	1147 case InstArithmetic::Fmul:

1134 case InstArithmetic::Fdiv:	1148 case InstArithmetic::Fdiv:

1135 case InstArithmetic::Frem:	1149 case InstArithmetic::Frem:

1136 llvm_unreachable("FP instruction with i64 type");	1150 llvm_unreachable("FP instruction with i64 type");

1137 break;	1151 break;

1138 }	1152 }

1139 } else if (isVectorType(Dest->getType())) {	1153 } else if (isVectorType(Dest->getType())) {

	1154 // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in

	1155 // registers. This is a workaround of the fact that there is no

	1156 // support for aligning stack operands. Once alignment support is

	1157 // implemented, replace legalizeToVar(Src1) with Src1.
	Jim Stichnoth 2014/07/16 19:17:10 I'm not adamant about this, but I think it might b I'm not adamant about this, but I think it might be easier for the person who eventually fixes this TODO if you did something like: #define LEGAL_HACK(s) legalizeToVar((s)) ... LEGAL_HACK(Src1) ... and then advise removing LEGAL_HACK(). wala 2014/07/17 01:34:54 Good idea. Done. Show quoted text On 2014/07/16 19:17:10, stichnot wrote: > I'm not adamant about this, but I think it might be easier for the person who > eventually fixes this TODO if you did something like: > > #define LEGAL_HACK(s) legalizeToVar((s)) > ... LEGAL_HACK(Src1) ... > > and then advise removing LEGAL_HACK(). Good idea. Done.
	1158 //

	1159 // TODO: Trap on divide and modulo by zero.
	Jim Stichnoth 2014/07/16 19:17:10 I believe this should only trap for integer div/mo I believe this should only trap for integer div/mod, not floating point. Hopefully the issue tracker will clarify... wala 2014/07/17 01:34:53 Done. Show quoted text On 2014/07/16 19:17:10, stichnot wrote: > I believe this should only trap for integer div/mod, not floating point. > Hopefully the issue tracker will clarify... Done.
	1160 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899

1140 switch (Inst->getOp()) {	1161 switch (Inst->getOp()) {

1141 case InstArithmetic::_num:	1162 case InstArithmetic::_num:

1142 llvm_unreachable("Unknown arithmetic operator");	1163 llvm_unreachable("Unknown arithmetic operator");

1143 break;	1164 break;

1144 case InstArithmetic::Add:	1165 case InstArithmetic::Add: {

1145 case InstArithmetic::And:	1166 Variable *T = makeReg(Dest->getType());

1146 case InstArithmetic::Or:	1167 _movp(T, Src0);

1147 case InstArithmetic::Xor:	1168 _padd(T, legalizeToVar(Src1));

1148 case InstArithmetic::Sub:	1169 _movp(Dest, T);

1149 case InstArithmetic::Mul:	1170 } break;

1150 case InstArithmetic::Shl:	1171 case InstArithmetic::And: {

1151 case InstArithmetic::Lshr:	1172 Variable *T = makeReg(Dest->getType());

1152 case InstArithmetic::Ashr:	1173 _movp(T, Src0);

1153 case InstArithmetic::Udiv:	1174 _pand(T, legalizeToVar(Src1));

1154 case InstArithmetic::Sdiv:	1175 _movp(Dest, T);

1155 case InstArithmetic::Urem:	1176 } break;

1156 case InstArithmetic::Srem:	1177 case InstArithmetic::Or: {

1157 // TODO(wala): Handle these.	1178 Variable *T = makeReg(Dest->getType());

1158 Func->setError("Unhandled instruction");	1179 _movp(T, Src0);

1159 break;	1180 _por(T, legalizeToVar(Src1));

	1181 _movp(Dest, T);

	1182 } break;

	1183 case InstArithmetic::Xor: {

	1184 Variable *T = makeReg(Dest->getType());

	1185 _movp(T, Src0);

	1186 _pxor(T, legalizeToVar(Src1));

	1187 _movp(Dest, T);

	1188 } break;

	1189 case InstArithmetic::Sub: {

	1190 Variable *T = makeReg(Dest->getType());

	1191 _movp(T, Src0);

	1192 _psub(T, legalizeToVar(Src1));

	1193 _movp(Dest, T);

	1194 } break;

	1195 case InstArithmetic::Mul: {

	1196 if (Dest->getType() == IceType_v4i32) {

	1197 // Lowering sequence:

	1198 // movups T1, Src0

	1199 // pshufd T2, Src0, [1,0,3,0]
	jvoung (off chromium) 2014/07/16 19:23:26 nit: The ordering of the vector ([1, 0, 3, 0]) is nit: The ordering of the vector ([1, 0, 3, 0]) is a bit unclear at first glance. I'm assuming you have index 0 on the left and index 3 on the right. It might be more clear if it was the other way around, to match the order of bits of the shuffle mask? wala 2014/07/17 01:34:53 I write vectors as if they were arrays in the comm Show quoted text On 2014/07/16 19:23:26, jvoung wrote: > nit: The ordering of the vector ([1, 0, 3, 0]) is a bit unclear at first glance. > I'm assuming you have index 0 on the left and index 3 on the right. It might be > more clear if it was the other way around, to match the order of bits of the > shuffle mask? I write vectors as if they were arrays in the comments. The representation of arrays starts with index 0 on the left and I don't want to switch orderings too much in the pseudocode. Furthermore, the Src[...] syntax that I use in the comments below this line also starts with index 0 on the left. These are the changes that I made: I changed the [ and ] to { and } to emphasize that I'm treating the values as if they were arrays and starting with index 0 on the left. I also added a comment that index 0 starts on the left. If that's not alright, I can change it to go from right to left. jvoung (off chromium) 2014/07/17 15:00:38 Okay, that helps some. Show quoted text On 2014/07/17 01:34:53, wala wrote: > On 2014/07/16 19:23:26, jvoung wrote: > > nit: The ordering of the vector ([1, 0, 3, 0]) is a bit unclear at first > glance. > > I'm assuming you have index 0 on the left and index 3 on the right. It might > be > > more clear if it was the other way around, to match the order of bits of the > > shuffle mask? > > I write vectors as if they were arrays in the comments. The representation of > arrays starts with index 0 on the left and I don't want to switch orderings too > much in the pseudocode. > > Furthermore, the Src[...] syntax that I use in the comments below this line also > starts with index 0 on the left. > > These are the changes that I made: I changed the [ and ] to { and } to emphasize > that I'm treating the values as if they were arrays and starting with index 0 on > the left. I also added a comment that index 0 starts on the left. > > If that's not alright, I can change it to go from right to left. Okay, that helps some.
	1200 // pshufd T3, Src1, [1,0,3,0]

	1201 // # T1 = { Src0[0] * Src1[0], Src0[2] * Src1[2] }

	1202 // pmuludq T1, Src1

	1203 // # T2 = { Src0[1] * Src1[1], Src0[3] * Src1[3] }

	1204 // pmuludq T2, T3

	1205 // # T1 = { lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2]) }

	1206 // shufps T1, T2, [0,2,0,2]

	1207 // pshufd T4, T1, [0,2,1,3]

	1208 // movups Dest, T4

	1209 //

	1210 // TODO(wala): SSE4.1 has pmulld.

	1211

	1212 // Mask that directs pshufd to create a vector with entries

	1213 // Src[1, 0, 3, 0]

	1214 const unsigned Constant1030 = 0x31;

	1215 Constant *Mask1030 = Ctx->getConstantInt(IceType_i8, Constant1030);

	1216 // Mask that directs shufps to create a vector with entries

	1217 // Dest[0, 2], Src[0, 2]

	1218 const unsigned Mask0202 = 0x88;

	1219 // Mask that directs pshufd to create a vector with entries

	1220 // Src[0, 2, 1, 3]

	1221 const unsigned Mask0213 = 0xd8;

	1222 Variable *T1 = makeReg(IceType_v4i32);

	1223 Variable *T2 = makeReg(IceType_v4i32);

	1224 Variable *T3 = makeReg(IceType_v4i32);

	1225 Variable *T4 = makeReg(IceType_v4i32);

	1226 _movp(T1, Src0);

	1227 // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R

	1228 // with Src1 after stack operand alignment support is

	1229 // implemented.

	1230 Variable *Src0R = legalizeToVar(Src0);

	1231 Variable *Src1R = legalizeToVar(Src1);

	1232 _pshufd(T2, Src0R, Mask1030);

	1233 _pshufd(T3, Src1R, Mask1030);

	1234 _pmuludq(T1, Src1R);

	1235 _pmuludq(T2, T3);

	1236 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));

	1237 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));

	1238 _movp(Dest, T4);

	1239 } else if (Dest->getType() == IceType_v8i16) {

	1240 Variable *T = makeReg(IceType_v8i16);

	1241 _movp(T, Src0);

	1242 _pmullw(T, legalizeToVar(Src1));

	1243 _movp(Dest, T);

	1244 } else {

	1245 assert(Dest->getType() == IceType_v16i8);

	1246 // Sz_mul_v16i8

	1247 const IceString Helper = "Sz_mul_v16i8";

	1248 const SizeT MaxSrcs = 2;

	1249 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

	1250 Call->addArg(Src0);

	1251 Call->addArg(Src1);

	1252 lowerCall(Call);

	1253 }

	1254 } break;

	1255 case InstArithmetic::Shl: {

	1256 // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8

	1257 const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType());

	1258 const SizeT MaxSrcs = 2;

	1259 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

	1260 Call->addArg(Src0);

	1261 Call->addArg(Src1);

	1262 lowerCall(Call);

	1263 } break;

	1264 case InstArithmetic::Lshr: {

	1265 // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8

	1266 const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType());

	1267 const SizeT MaxSrcs = 2;

	1268 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

	1269 Call->addArg(Src0);

	1270 Call->addArg(Src1);

	1271 lowerCall(Call);

	1272 } break;

	1273 case InstArithmetic::Ashr: {

	1274 // Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8

	1275 const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType());

	1276 const SizeT MaxSrcs = 2;

	1277 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

	1278 Call->addArg(Src0);

	1279 Call->addArg(Src1);

	1280 lowerCall(Call);

	1281 } break;

	1282 case InstArithmetic::Udiv: {

	1283 // Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8

	1284 const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType());

	1285 const SizeT MaxSrcs = 2;

	1286 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

	1287 Call->addArg(Src0);

	1288 Call->addArg(Src1);

	1289 lowerCall(Call);

	1290 } break;

	1291 case InstArithmetic::Sdiv: {

	1292 // Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8

	1293 const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType());

	1294 const SizeT MaxSrcs = 2;

	1295 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

	1296 Call->addArg(Src0);

	1297 Call->addArg(Src1);

	1298 lowerCall(Call);

	1299 } break;

	1300 case InstArithmetic::Urem: {

	1301 // Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8

	1302 const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType());

	1303 const SizeT MaxSrcs = 2;

	1304 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

	1305 Call->addArg(Src0);

	1306 Call->addArg(Src1);

	1307 lowerCall(Call);

	1308 } break;

	1309 case InstArithmetic::Srem: {

	1310 // Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8

	1311 const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType());

	1312 const SizeT MaxSrcs = 2;

	1313 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

	1314 Call->addArg(Src0);

	1315 Call->addArg(Src1);

	1316 lowerCall(Call);

	1317 } break;

1160 case InstArithmetic::Fadd: {	1318 case InstArithmetic::Fadd: {

1161 Variable *T = makeReg(Dest->getType());	1319 Variable *T = makeReg(Dest->getType());

1162 _movp(T, Src0);	1320 _movp(T, Src0);

1163 _addps(T, Src1);	1321 _addps(T, legalizeToVar(Src1));

1164 _movp(Dest, T);	1322 _movp(Dest, T);

1165 } break;	1323 } break;

1166 case InstArithmetic::Fsub: {	1324 case InstArithmetic::Fsub: {

1167 Variable *T = makeReg(Dest->getType());	1325 Variable *T = makeReg(Dest->getType());

1168 _movp(T, Src0);	1326 _movp(T, Src0);

1169 _subps(T, Src1);	1327 _subps(T, legalizeToVar(Src1));

1170 _movp(Dest, T);	1328 _movp(Dest, T);

1171 } break;	1329 } break;

1172 case InstArithmetic::Fmul: {	1330 case InstArithmetic::Fmul: {

1173 Variable *T = makeReg(Dest->getType());	1331 Variable *T = makeReg(Dest->getType());

1174 _movp(T, Src0);	1332 _movp(T, Src0);

1175 _mulps(T, Src1);	1333 _mulps(T, legalizeToVar(Src1));

1176 _movp(Dest, T);	1334 _movp(Dest, T);

1177 } break;	1335 } break;

1178 case InstArithmetic::Fdiv: {	1336 case InstArithmetic::Fdiv: {

1179 Variable *T = makeReg(Dest->getType());	1337 Variable *T = makeReg(Dest->getType());

1180 _movp(T, Src0);	1338 _movp(T, Src0);

1181 _divps(T, Src1);	1339 _divps(T, legalizeToVar(Src1));

1182 _movp(Dest, T);	1340 _movp(Dest, T);

1183 } break;	1341 } break;

1184 case InstArithmetic::Frem: {	1342 case InstArithmetic::Frem: {

1185 const SizeT MaxSrcs = 2;	1343 const SizeT MaxSrcs = 2;

1186 InstCall *Call = makeHelperCall("__frem_v4f32", Dest, MaxSrcs);	1344 InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs);

1187 Call->addArg(Src0);	1345 Call->addArg(Src0);

1188 Call->addArg(Src1);	1346 Call->addArg(Src1);

1189 lowerCall(Call);	1347 lowerCall(Call);

1190 } break;	1348 } break;

1191 }	1349 }

1192 } else { // Dest->getType() is non-i64 scalar	1350 } else { // Dest->getType() is non-i64 scalar

1193 Variable *T_edx = NULL;	1351 Variable *T_edx = NULL;

1194 Variable *T = NULL;	1352 Variable *T = NULL;

1195 switch (Inst->getOp()) {	1353 switch (Inst->getOp()) {

1196 case InstArithmetic::_num:	1354 case InstArithmetic::_num:

(...skipping 1873 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3070 for (SizeT i = 0; i < Size; ++i) {	3228 for (SizeT i = 0; i < Size; ++i) {

3071 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";	3229 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";

3072 }	3230 }

3073 Str << "\t.size\t" << MangledName << ", " << Size << "\n";	3231 Str << "\t.size\t" << MangledName << ", " << Size << "\n";

3074 }	3232 }

3075 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName	3233 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName

3076 << "\n";	3234 << "\n";

3077 }	3235 }

3078	3236

3079 } // end of namespace Ice	3237 } // end of namespace Ice

OLD	NEW

« crosstest/test_arith_main.cpp ('K') | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/vector-arith.ll » ('j') | no next file with comments »