Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(409)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 397833002: Lower the rest of the vector arithmetic operations. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Rebase Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/vector-arith.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
83 size_t Index = static_cast<size_t>(Cond); 83 size_t Index = static_cast<size_t>(Cond);
84 assert(Index < TableIcmp32Size); 84 assert(Index < TableIcmp32Size);
85 return TableIcmp32[Index].Mapping; 85 return TableIcmp32[Index].Mapping;
86 } 86 }
87 87
88 // The maximum number of arguments to pass in XMM registers 88 // The maximum number of arguments to pass in XMM registers
89 const unsigned X86_MAX_XMM_ARGS = 4; 89 const unsigned X86_MAX_XMM_ARGS = 4;
90 // The number of bits in a byte 90 // The number of bits in a byte
91 const unsigned X86_CHAR_BIT = 8; 91 const unsigned X86_CHAR_BIT = 8;
92 92
93 // Return a string representation of the type that is suitable for use
94 // in an identifier.
95 IceString typeIdentString(const Type Ty) {
96 IceString Str;
97 llvm::raw_string_ostream BaseOS(Str);
98 Ostream OS(&BaseOS);
99 if (isVectorType(Ty)) {
100 OS << "v" << typeNumElements(Ty) << typeElementType(Ty);
101 } else {
102 OS << Ty;
103 }
104 return BaseOS.str();
105 }
106
93 // In some cases, there are x-macros tables for both high-level and 107 // In some cases, there are x-macros tables for both high-level and
94 // low-level instructions/operands that use the same enum key value. 108 // low-level instructions/operands that use the same enum key value.
95 // The tables are kept separate to maintain a proper separation 109 // The tables are kept separate to maintain a proper separation
96 // between abstraction layers. There is a risk that the tables 110 // between abstraction layers. There is a risk that the tables
97 // could get out of sync if enum values are reordered or if entries 111 // could get out of sync if enum values are reordered or if entries
98 // are added or deleted. This dummy function uses static_assert to 112 // are added or deleted. This dummy function uses static_assert to
99 // ensure everything is kept in sync. 113 // ensure everything is kept in sync.
100 void xMacroIntegrityCheck() { 114 void xMacroIntegrityCheck() {
101 // Validate the enum values in FCMPX8632_TABLE. 115 // Validate the enum values in FCMPX8632_TABLE.
102 { 116 {
(...skipping 1029 matching lines...) Expand 10 before | Expand all | Expand 10 after
1132 } break; 1146 } break;
1133 case InstArithmetic::Fadd: 1147 case InstArithmetic::Fadd:
1134 case InstArithmetic::Fsub: 1148 case InstArithmetic::Fsub:
1135 case InstArithmetic::Fmul: 1149 case InstArithmetic::Fmul:
1136 case InstArithmetic::Fdiv: 1150 case InstArithmetic::Fdiv:
1137 case InstArithmetic::Frem: 1151 case InstArithmetic::Frem:
1138 llvm_unreachable("FP instruction with i64 type"); 1152 llvm_unreachable("FP instruction with i64 type");
1139 break; 1153 break;
1140 } 1154 }
1141 } else if (isVectorType(Dest->getType())) { 1155 } else if (isVectorType(Dest->getType())) {
1156 // TODO: Trap on integer divide and integer modulo by zero.
1157 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
1158 //
1159 // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in
1160 // registers. This is a workaround of the fact that there is no
1161 // support for aligning stack operands. Once there is support,
1162 // remove LEGAL_HACK.
1163 #define LEGAL_HACK(s) legalizeToVar((s))
1142 switch (Inst->getOp()) { 1164 switch (Inst->getOp()) {
1143 case InstArithmetic::_num: 1165 case InstArithmetic::_num:
1144 llvm_unreachable("Unknown arithmetic operator"); 1166 llvm_unreachable("Unknown arithmetic operator");
1145 break; 1167 break;
1146 case InstArithmetic::Add: 1168 case InstArithmetic::Add: {
1147 case InstArithmetic::And: 1169 Variable *T = makeReg(Dest->getType());
1148 case InstArithmetic::Or: 1170 _movp(T, Src0);
1149 case InstArithmetic::Xor: 1171 _padd(T, LEGAL_HACK(Src1));
1150 case InstArithmetic::Sub: 1172 _movp(Dest, T);
1151 case InstArithmetic::Mul: 1173 } break;
1152 case InstArithmetic::Shl: 1174 case InstArithmetic::And: {
1153 case InstArithmetic::Lshr: 1175 Variable *T = makeReg(Dest->getType());
1154 case InstArithmetic::Ashr: 1176 _movp(T, Src0);
1155 case InstArithmetic::Udiv: 1177 _pand(T, LEGAL_HACK(Src1));
1156 case InstArithmetic::Sdiv: 1178 _movp(Dest, T);
1157 case InstArithmetic::Urem: 1179 } break;
1158 case InstArithmetic::Srem: 1180 case InstArithmetic::Or: {
1159 // TODO(wala): Handle these. 1181 Variable *T = makeReg(Dest->getType());
1160 Func->setError("Unhandled instruction"); 1182 _movp(T, Src0);
1161 break; 1183 _por(T, LEGAL_HACK(Src1));
1184 _movp(Dest, T);
1185 } break;
1186 case InstArithmetic::Xor: {
1187 Variable *T = makeReg(Dest->getType());
1188 _movp(T, Src0);
1189 _pxor(T, LEGAL_HACK(Src1));
1190 _movp(Dest, T);
1191 } break;
1192 case InstArithmetic::Sub: {
1193 Variable *T = makeReg(Dest->getType());
1194 _movp(T, Src0);
1195 _psub(T, LEGAL_HACK(Src1));
1196 _movp(Dest, T);
1197 } break;
1198 case InstArithmetic::Mul: {
1199 if (Dest->getType() == IceType_v4i32) {
1200 // Lowering sequence:
1201 // Note: The mask arguments have index 0 on the left.
1202 //
1203 // movups T1, Src0
1204 // pshufd T2, Src0, {1,0,3,0}
1205 // pshufd T3, Src1, {1,0,3,0}
1206 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1207 // pmuludq T1, Src1
1208 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1209 // pmuludq T2, T3
1210 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1211 // shufps T1, T2, {0,2,0,2}
1212 // pshufd T4, T1, {0,2,1,3}
1213 // movups Dest, T4
1214 //
1215 // TODO(wala): SSE4.1 has pmulld.
1216
1217 // Mask that directs pshufd to create a vector with entries
1218 // Src[1, 0, 3, 0]
1219 const unsigned Constant1030 = 0x31;
1220 Constant *Mask1030 = Ctx->getConstantInt(IceType_i8, Constant1030);
1221 // Mask that directs shufps to create a vector with entries
1222 // Dest[0, 2], Src[0, 2]
1223 const unsigned Mask0202 = 0x88;
1224 // Mask that directs pshufd to create a vector with entries
1225 // Src[0, 2, 1, 3]
1226 const unsigned Mask0213 = 0xd8;
1227 Variable *T1 = makeReg(IceType_v4i32);
1228 Variable *T2 = makeReg(IceType_v4i32);
1229 Variable *T3 = makeReg(IceType_v4i32);
1230 Variable *T4 = makeReg(IceType_v4i32);
1231 _movp(T1, Src0);
1232 // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R
1233 // with Src1 after stack operand alignment support is
1234 // implemented.
1235 Variable *Src0R = LEGAL_HACK(Src0);
1236 Variable *Src1R = LEGAL_HACK(Src1);
1237 _pshufd(T2, Src0R, Mask1030);
1238 _pshufd(T3, Src1R, Mask1030);
1239 _pmuludq(T1, Src1R);
1240 _pmuludq(T2, T3);
1241 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));
1242 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));
1243 _movp(Dest, T4);
1244 } else if (Dest->getType() == IceType_v8i16) {
1245 Variable *T = makeReg(IceType_v8i16);
1246 _movp(T, Src0);
1247 _pmullw(T, legalizeToVar(Src1));
1248 _movp(Dest, T);
1249 } else {
1250 assert(Dest->getType() == IceType_v16i8);
1251 // Sz_mul_v16i8
1252 const IceString Helper = "Sz_mul_v16i8";
1253 const SizeT MaxSrcs = 2;
1254 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1255 Call->addArg(Src0);
1256 Call->addArg(Src1);
1257 lowerCall(Call);
1258 }
1259 } break;
1260 case InstArithmetic::Shl: {
1261 // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8
1262 const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType());
1263 const SizeT MaxSrcs = 2;
1264 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1265 Call->addArg(Src0);
1266 Call->addArg(Src1);
1267 lowerCall(Call);
1268 } break;
1269 case InstArithmetic::Lshr: {
1270 // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8
1271 const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType());
1272 const SizeT MaxSrcs = 2;
1273 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1274 Call->addArg(Src0);
1275 Call->addArg(Src1);
1276 lowerCall(Call);
1277 } break;
1278 case InstArithmetic::Ashr: {
1279 // Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8
1280 const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType());
1281 const SizeT MaxSrcs = 2;
1282 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1283 Call->addArg(Src0);
1284 Call->addArg(Src1);
1285 lowerCall(Call);
1286 } break;
1287 case InstArithmetic::Udiv: {
1288 // Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8
1289 const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType());
1290 const SizeT MaxSrcs = 2;
1291 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1292 Call->addArg(Src0);
1293 Call->addArg(Src1);
1294 lowerCall(Call);
1295 } break;
1296 case InstArithmetic::Sdiv: {
1297 // Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8
1298 const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType());
1299 const SizeT MaxSrcs = 2;
1300 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1301 Call->addArg(Src0);
1302 Call->addArg(Src1);
1303 lowerCall(Call);
1304 } break;
1305 case InstArithmetic::Urem: {
1306 // Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8
1307 const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType());
1308 const SizeT MaxSrcs = 2;
1309 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1310 Call->addArg(Src0);
1311 Call->addArg(Src1);
1312 lowerCall(Call);
1313 } break;
1314 case InstArithmetic::Srem: {
1315 // Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8
1316 const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType());
1317 const SizeT MaxSrcs = 2;
1318 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1319 Call->addArg(Src0);
1320 Call->addArg(Src1);
1321 lowerCall(Call);
1322 } break;
1162 case InstArithmetic::Fadd: { 1323 case InstArithmetic::Fadd: {
1163 Variable *T = makeReg(Dest->getType()); 1324 Variable *T = makeReg(Dest->getType());
1164 _movp(T, Src0); 1325 _movp(T, Src0);
1165 _addps(T, Src1); 1326 _addps(T, LEGAL_HACK(Src1));
1166 _movp(Dest, T); 1327 _movp(Dest, T);
1167 } break; 1328 } break;
1168 case InstArithmetic::Fsub: { 1329 case InstArithmetic::Fsub: {
1169 Variable *T = makeReg(Dest->getType()); 1330 Variable *T = makeReg(Dest->getType());
1170 _movp(T, Src0); 1331 _movp(T, Src0);
1171 _subps(T, Src1); 1332 _subps(T, LEGAL_HACK(Src1));
1172 _movp(Dest, T); 1333 _movp(Dest, T);
1173 } break; 1334 } break;
1174 case InstArithmetic::Fmul: { 1335 case InstArithmetic::Fmul: {
1175 Variable *T = makeReg(Dest->getType()); 1336 Variable *T = makeReg(Dest->getType());
1176 _movp(T, Src0); 1337 _movp(T, Src0);
1177 _mulps(T, Src1); 1338 _mulps(T, LEGAL_HACK(Src1));
1178 _movp(Dest, T); 1339 _movp(Dest, T);
1179 } break; 1340 } break;
1180 case InstArithmetic::Fdiv: { 1341 case InstArithmetic::Fdiv: {
1181 Variable *T = makeReg(Dest->getType()); 1342 Variable *T = makeReg(Dest->getType());
1182 _movp(T, Src0); 1343 _movp(T, Src0);
1183 _divps(T, Src1); 1344 _divps(T, LEGAL_HACK(Src1));
1184 _movp(Dest, T); 1345 _movp(Dest, T);
1185 } break; 1346 } break;
1186 case InstArithmetic::Frem: { 1347 case InstArithmetic::Frem: {
1187 const SizeT MaxSrcs = 2; 1348 const SizeT MaxSrcs = 2;
1188 InstCall *Call = makeHelperCall("__frem_v4f32", Dest, MaxSrcs); 1349 InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs);
1189 Call->addArg(Src0); 1350 Call->addArg(Src0);
1190 Call->addArg(Src1); 1351 Call->addArg(Src1);
1191 lowerCall(Call); 1352 lowerCall(Call);
1192 } break; 1353 } break;
1193 } 1354 }
1355 #undef LEGAL_HACK
1194 } else { // Dest->getType() is non-i64 scalar 1356 } else { // Dest->getType() is non-i64 scalar
1195 Variable *T_edx = NULL; 1357 Variable *T_edx = NULL;
1196 Variable *T = NULL; 1358 Variable *T = NULL;
1197 switch (Inst->getOp()) { 1359 switch (Inst->getOp()) {
1198 case InstArithmetic::_num: 1360 case InstArithmetic::_num:
1199 llvm_unreachable("Unknown arithmetic operator"); 1361 llvm_unreachable("Unknown arithmetic operator");
1200 break; 1362 break;
1201 case InstArithmetic::Add: 1363 case InstArithmetic::Add:
1202 _mov(T, Src0); 1364 _mov(T, Src0);
1203 _add(T, Src1); 1365 _add(T, Src1);
(...skipping 2117 matching lines...) Expand 10 before | Expand all | Expand 10 after
3321 for (SizeT i = 0; i < Size; ++i) { 3483 for (SizeT i = 0; i < Size; ++i) {
3322 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 3484 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
3323 } 3485 }
3324 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 3486 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
3325 } 3487 }
3326 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 3488 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
3327 << "\n"; 3489 << "\n";
3328 } 3490 }
3329 3491
3330 } // end of namespace Ice 3492 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/vector-arith.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698