src/IceTargetLoweringX8632.cpp - Issue 397833002: Lower the rest of the vector arithmetic operations.

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 397833002: Lower the rest of the vector arithmetic operations. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master

Patch Set: Rebase Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 //

10 // This file implements the TargetLoweringX8632 class, which	10 // This file implements the TargetLoweringX8632 class, which

(...skipping 72 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
83 size_t Index = static_cast<size_t>(Cond);	83 size_t Index = static_cast<size_t>(Cond);

84 assert(Index < TableIcmp32Size);	84 assert(Index < TableIcmp32Size);

85 return TableIcmp32[Index].Mapping;	85 return TableIcmp32[Index].Mapping;

86 }	86 }

87	87

88 // The maximum number of arguments to pass in XMM registers	88 // The maximum number of arguments to pass in XMM registers

89 const unsigned X86_MAX_XMM_ARGS = 4;	89 const unsigned X86_MAX_XMM_ARGS = 4;

90 // The number of bits in a byte	90 // The number of bits in a byte

91 const unsigned X86_CHAR_BIT = 8;	91 const unsigned X86_CHAR_BIT = 8;

92	92

	93 // Return a string representation of the type that is suitable for use

	94 // in an identifier.

	95 IceString typeIdentString(const Type Ty) {

	96 IceString Str;

	97 llvm::raw_string_ostream BaseOS(Str);

	98 Ostream OS(&BaseOS);

	99 if (isVectorType(Ty)) {

	100 OS << "v" << typeNumElements(Ty) << typeElementType(Ty);

	101 } else {

	102 OS << Ty;

	103 }

	104 return BaseOS.str();

	105 }

	106

93 // In some cases, there are x-macros tables for both high-level and	107 // In some cases, there are x-macros tables for both high-level and

94 // low-level instructions/operands that use the same enum key value.	108 // low-level instructions/operands that use the same enum key value.

95 // The tables are kept separate to maintain a proper separation	109 // The tables are kept separate to maintain a proper separation

96 // between abstraction layers. There is a risk that the tables	110 // between abstraction layers. There is a risk that the tables

97 // could get out of sync if enum values are reordered or if entries	111 // could get out of sync if enum values are reordered or if entries

98 // are added or deleted. This dummy function uses static_assert to	112 // are added or deleted. This dummy function uses static_assert to

99 // ensure everything is kept in sync.	113 // ensure everything is kept in sync.

100 void xMacroIntegrityCheck() {	114 void xMacroIntegrityCheck() {

101 // Validate the enum values in FCMPX8632_TABLE.	115 // Validate the enum values in FCMPX8632_TABLE.

102 {	116 {

(...skipping 1029 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1132 } break;	1146 } break;

1133 case InstArithmetic::Fadd:	1147 case InstArithmetic::Fadd:

1134 case InstArithmetic::Fsub:	1148 case InstArithmetic::Fsub:

1135 case InstArithmetic::Fmul:	1149 case InstArithmetic::Fmul:

1136 case InstArithmetic::Fdiv:	1150 case InstArithmetic::Fdiv:

1137 case InstArithmetic::Frem:	1151 case InstArithmetic::Frem:

1138 llvm_unreachable("FP instruction with i64 type");	1152 llvm_unreachable("FP instruction with i64 type");

1139 break;	1153 break;

1140 }	1154 }

1141 } else if (isVectorType(Dest->getType())) {	1155 } else if (isVectorType(Dest->getType())) {

	1156 // TODO: Trap on integer divide and integer modulo by zero.

	1157 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899

	1158 //

	1159 // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in

	1160 // registers. This is a workaround of the fact that there is no

	1161 // support for aligning stack operands. Once there is support,

	1162 // remove LEGAL_HACK.

	1163 #define LEGAL_HACK(s) legalizeToVar((s))

1142 switch (Inst->getOp()) {	1164 switch (Inst->getOp()) {

1143 case InstArithmetic::_num:	1165 case InstArithmetic::_num:

1144 llvm_unreachable("Unknown arithmetic operator");	1166 llvm_unreachable("Unknown arithmetic operator");

1145 break;	1167 break;

1146 case InstArithmetic::Add:	1168 case InstArithmetic::Add: {

1147 case InstArithmetic::And:	1169 Variable *T = makeReg(Dest->getType());

1148 case InstArithmetic::Or:	1170 _movp(T, Src0);

1149 case InstArithmetic::Xor:	1171 _padd(T, LEGAL_HACK(Src1));

1150 case InstArithmetic::Sub:	1172 _movp(Dest, T);

1151 case InstArithmetic::Mul:	1173 } break;

1152 case InstArithmetic::Shl:	1174 case InstArithmetic::And: {

1153 case InstArithmetic::Lshr:	1175 Variable *T = makeReg(Dest->getType());

1154 case InstArithmetic::Ashr:	1176 _movp(T, Src0);

1155 case InstArithmetic::Udiv:	1177 _pand(T, LEGAL_HACK(Src1));

1156 case InstArithmetic::Sdiv:	1178 _movp(Dest, T);

1157 case InstArithmetic::Urem:	1179 } break;

1158 case InstArithmetic::Srem:	1180 case InstArithmetic::Or: {

1159 // TODO(wala): Handle these.	1181 Variable *T = makeReg(Dest->getType());

1160 Func->setError("Unhandled instruction");	1182 _movp(T, Src0);

1161 break;	1183 _por(T, LEGAL_HACK(Src1));

	1184 _movp(Dest, T);

	1185 } break;

	1186 case InstArithmetic::Xor: {

	1187 Variable *T = makeReg(Dest->getType());

	1188 _movp(T, Src0);

	1189 _pxor(T, LEGAL_HACK(Src1));

	1190 _movp(Dest, T);

	1191 } break;

	1192 case InstArithmetic::Sub: {

	1193 Variable *T = makeReg(Dest->getType());

	1194 _movp(T, Src0);

	1195 _psub(T, LEGAL_HACK(Src1));

	1196 _movp(Dest, T);

	1197 } break;

	1198 case InstArithmetic::Mul: {

	1199 if (Dest->getType() == IceType_v4i32) {

	1200 // Lowering sequence:

	1201 // Note: The mask arguments have index 0 on the left.

	1202 //

	1203 // movups T1, Src0

	1204 // pshufd T2, Src0, {1,0,3,0}

	1205 // pshufd T3, Src1, {1,0,3,0}

	1206 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}

	1207 // pmuludq T1, Src1

	1208 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}

	1209 // pmuludq T2, T3

	1210 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}

	1211 // shufps T1, T2, {0,2,0,2}

	1212 // pshufd T4, T1, {0,2,1,3}

	1213 // movups Dest, T4

	1214 //

	1215 // TODO(wala): SSE4.1 has pmulld.

	1216

	1217 // Mask that directs pshufd to create a vector with entries

	1218 // Src[1, 0, 3, 0]

	1219 const unsigned Constant1030 = 0x31;

	1220 Constant *Mask1030 = Ctx->getConstantInt(IceType_i8, Constant1030);

	1221 // Mask that directs shufps to create a vector with entries

	1222 // Dest[0, 2], Src[0, 2]

	1223 const unsigned Mask0202 = 0x88;

	1224 // Mask that directs pshufd to create a vector with entries

	1225 // Src[0, 2, 1, 3]

	1226 const unsigned Mask0213 = 0xd8;

	1227 Variable *T1 = makeReg(IceType_v4i32);

	1228 Variable *T2 = makeReg(IceType_v4i32);

	1229 Variable *T3 = makeReg(IceType_v4i32);

	1230 Variable *T4 = makeReg(IceType_v4i32);

	1231 _movp(T1, Src0);

	1232 // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R

	1233 // with Src1 after stack operand alignment support is

	1234 // implemented.

	1235 Variable *Src0R = LEGAL_HACK(Src0);

	1236 Variable *Src1R = LEGAL_HACK(Src1);

	1237 _pshufd(T2, Src0R, Mask1030);

	1238 _pshufd(T3, Src1R, Mask1030);

	1239 _pmuludq(T1, Src1R);

	1240 _pmuludq(T2, T3);

	1241 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));

	1242 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));

	1243 _movp(Dest, T4);

	1244 } else if (Dest->getType() == IceType_v8i16) {

	1245 Variable *T = makeReg(IceType_v8i16);

	1246 _movp(T, Src0);

	1247 _pmullw(T, legalizeToVar(Src1));

	1248 _movp(Dest, T);

	1249 } else {

	1250 assert(Dest->getType() == IceType_v16i8);

	1251 // Sz_mul_v16i8

	1252 const IceString Helper = "Sz_mul_v16i8";

	1253 const SizeT MaxSrcs = 2;

	1254 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

	1255 Call->addArg(Src0);

	1256 Call->addArg(Src1);

	1257 lowerCall(Call);

	1258 }

	1259 } break;

	1260 case InstArithmetic::Shl: {

	1261 // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8

	1262 const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType());

	1263 const SizeT MaxSrcs = 2;

	1264 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

	1265 Call->addArg(Src0);

	1266 Call->addArg(Src1);

	1267 lowerCall(Call);

	1268 } break;

	1269 case InstArithmetic::Lshr: {

	1270 // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8

	1271 const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType());

	1272 const SizeT MaxSrcs = 2;

	1273 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

	1274 Call->addArg(Src0);

	1275 Call->addArg(Src1);

	1276 lowerCall(Call);

	1277 } break;

	1278 case InstArithmetic::Ashr: {

	1279 // Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8

	1280 const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType());

	1281 const SizeT MaxSrcs = 2;

	1282 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

	1283 Call->addArg(Src0);

	1284 Call->addArg(Src1);

	1285 lowerCall(Call);

	1286 } break;

	1287 case InstArithmetic::Udiv: {

	1288 // Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8

	1289 const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType());

	1290 const SizeT MaxSrcs = 2;

	1291 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

	1292 Call->addArg(Src0);

	1293 Call->addArg(Src1);

	1294 lowerCall(Call);

	1295 } break;

	1296 case InstArithmetic::Sdiv: {

	1297 // Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8

	1298 const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType());

	1299 const SizeT MaxSrcs = 2;

	1300 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

	1301 Call->addArg(Src0);

	1302 Call->addArg(Src1);

	1303 lowerCall(Call);

	1304 } break;

	1305 case InstArithmetic::Urem: {

	1306 // Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8

	1307 const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType());

	1308 const SizeT MaxSrcs = 2;

	1309 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

	1310 Call->addArg(Src0);

	1311 Call->addArg(Src1);

	1312 lowerCall(Call);

	1313 } break;

	1314 case InstArithmetic::Srem: {

	1315 // Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8

	1316 const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType());

	1317 const SizeT MaxSrcs = 2;

	1318 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);

	1319 Call->addArg(Src0);

	1320 Call->addArg(Src1);

	1321 lowerCall(Call);

	1322 } break;

1162 case InstArithmetic::Fadd: {	1323 case InstArithmetic::Fadd: {

1163 Variable *T = makeReg(Dest->getType());	1324 Variable *T = makeReg(Dest->getType());

1164 _movp(T, Src0);	1325 _movp(T, Src0);

1165 _addps(T, Src1);	1326 _addps(T, LEGAL_HACK(Src1));

1166 _movp(Dest, T);	1327 _movp(Dest, T);

1167 } break;	1328 } break;

1168 case InstArithmetic::Fsub: {	1329 case InstArithmetic::Fsub: {

1169 Variable *T = makeReg(Dest->getType());	1330 Variable *T = makeReg(Dest->getType());

1170 _movp(T, Src0);	1331 _movp(T, Src0);

1171 _subps(T, Src1);	1332 _subps(T, LEGAL_HACK(Src1));

1172 _movp(Dest, T);	1333 _movp(Dest, T);

1173 } break;	1334 } break;

1174 case InstArithmetic::Fmul: {	1335 case InstArithmetic::Fmul: {

1175 Variable *T = makeReg(Dest->getType());	1336 Variable *T = makeReg(Dest->getType());

1176 _movp(T, Src0);	1337 _movp(T, Src0);

1177 _mulps(T, Src1);	1338 _mulps(T, LEGAL_HACK(Src1));

1178 _movp(Dest, T);	1339 _movp(Dest, T);

1179 } break;	1340 } break;

1180 case InstArithmetic::Fdiv: {	1341 case InstArithmetic::Fdiv: {

1181 Variable *T = makeReg(Dest->getType());	1342 Variable *T = makeReg(Dest->getType());

1182 _movp(T, Src0);	1343 _movp(T, Src0);

1183 _divps(T, Src1);	1344 _divps(T, LEGAL_HACK(Src1));

1184 _movp(Dest, T);	1345 _movp(Dest, T);

1185 } break;	1346 } break;

1186 case InstArithmetic::Frem: {	1347 case InstArithmetic::Frem: {

1187 const SizeT MaxSrcs = 2;	1348 const SizeT MaxSrcs = 2;

1188 InstCall *Call = makeHelperCall("__frem_v4f32", Dest, MaxSrcs);	1349 InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs);

1189 Call->addArg(Src0);	1350 Call->addArg(Src0);

1190 Call->addArg(Src1);	1351 Call->addArg(Src1);

1191 lowerCall(Call);	1352 lowerCall(Call);

1192 } break;	1353 } break;

1193 }	1354 }

	1355 #undef LEGAL_HACK

1194 } else { // Dest->getType() is non-i64 scalar	1356 } else { // Dest->getType() is non-i64 scalar

1195 Variable *T_edx = NULL;	1357 Variable *T_edx = NULL;

1196 Variable *T = NULL;	1358 Variable *T = NULL;

1197 switch (Inst->getOp()) {	1359 switch (Inst->getOp()) {

1198 case InstArithmetic::_num:	1360 case InstArithmetic::_num:

1199 llvm_unreachable("Unknown arithmetic operator");	1361 llvm_unreachable("Unknown arithmetic operator");

1200 break;	1362 break;

1201 case InstArithmetic::Add:	1363 case InstArithmetic::Add:

1202 _mov(T, Src0);	1364 _mov(T, Src0);

1203 _add(T, Src1);	1365 _add(T, Src1);

(...skipping 2117 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3321 for (SizeT i = 0; i < Size; ++i) {	3483 for (SizeT i = 0; i < Size; ++i) {

3322 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";	3484 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";

3323 }	3485 }

3324 Str << "\t.size\t" << MangledName << ", " << Size << "\n";	3486 Str << "\t.size\t" << MangledName << ", " << Size << "\n";

3325 }	3487 }

3326 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName	3488 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName

3327 << "\n";	3489 << "\n";

3328 }	3490 }

3329	3491

3330 } // end of namespace Ice	3492 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/vector-arith.ll » ('j') | no next file with comments »