Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(410)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 397833002: Lower the rest of the vector arithmetic operations. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Format crosstest.py Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
81 81
82 InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { 82 InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
83 size_t Index = static_cast<size_t>(Cond); 83 size_t Index = static_cast<size_t>(Cond);
84 assert(Index < TableIcmp32Size); 84 assert(Index < TableIcmp32Size);
85 return TableIcmp32[Index].Mapping; 85 return TableIcmp32[Index].Mapping;
86 } 86 }
87 87
88 // The maximum number of arguments to pass in XMM registers 88 // The maximum number of arguments to pass in XMM registers
89 const unsigned X86_MAX_XMM_ARGS = 4; 89 const unsigned X86_MAX_XMM_ARGS = 4;
90 90
91 // Return a string representation of the type that is suitable for use
92 // in an identifier.
93 IceString typeIdentString(const Type Ty) {
Jim Stichnoth 2014/07/16 19:17:10 Put inside an anonymous namespace
wala 2014/07/17 01:34:53 This is already inside an anonymous namespace.
Jim Stichnoth 2014/07/17 13:03:13 D'oh! Sorry!
94 IceString Str;
95 llvm::raw_string_ostream BaseOS(Str);
96 Ostream OS(&BaseOS);
97 if (isVectorType(Ty)) {
98 OS << "v" << typeNumElements(Ty) << typeElementType(Ty);
99 } else {
100 OS << Ty;
101 }
102 return BaseOS.str();
103 }
104
91 // In some cases, there are x-macros tables for both high-level and 105 // In some cases, there are x-macros tables for both high-level and
92 // low-level instructions/operands that use the same enum key value. 106 // low-level instructions/operands that use the same enum key value.
93 // The tables are kept separate to maintain a proper separation 107 // The tables are kept separate to maintain a proper separation
94 // between abstraction layers. There is a risk that the tables 108 // between abstraction layers. There is a risk that the tables
95 // could get out of sync if enum values are reordered or if entries 109 // could get out of sync if enum values are reordered or if entries
96 // are added or deleted. This dummy function uses static_assert to 110 // are added or deleted. This dummy function uses static_assert to
97 // ensure everything is kept in sync. 111 // ensure everything is kept in sync.
98 void xMacroIntegrityCheck() { 112 void xMacroIntegrityCheck() {
99 // Validate the enum values in FCMPX8632_TABLE. 113 // Validate the enum values in FCMPX8632_TABLE.
100 { 114 {
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
150 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); 164 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
151 ICEINSTICMP_TABLE; 165 ICEINSTICMP_TABLE;
152 #undef X 166 #undef X
153 } 167 }
154 168
155 // Validate the enum values in ICETYPEX8632_TABLE. 169 // Validate the enum values in ICETYPEX8632_TABLE.
156 { 170 {
157 // Define a temporary set of enum values based on low-level 171 // Define a temporary set of enum values based on low-level
158 // table entries. 172 // table entries.
159 enum _tmp_enum { 173 enum _tmp_enum {
160 #define X(tag, cvt, sdss, width) _tmp_##tag, 174 #define X(tag, cvt, sdss, pack, width) _tmp_##tag,
161 ICETYPEX8632_TABLE 175 ICETYPEX8632_TABLE
162 #undef X 176 #undef X
163 _num 177 _num
164 }; 178 };
165 // Define a set of constants based on high-level table entries. 179 // Define a set of constants based on high-level table entries.
166 #define X(tag, size, align, elts, elty, str) \ 180 #define X(tag, size, align, elts, elty, str) \
167 static const int _table1_##tag = tag; 181 static const int _table1_##tag = tag;
168 ICETYPE_TABLE; 182 ICETYPE_TABLE;
169 #undef X 183 #undef X
170 // Define a set of constants based on low-level table entries, 184 // Define a set of constants based on low-level table entries,
171 // and ensure the table entry keys are consistent. 185 // and ensure the table entry keys are consistent.
172 #define X(tag, cvt, sdss, width) \ 186 #define X(tag, cvt, sdss, pack, width) \
173 static const int _table2_##tag = _tmp_##tag; \ 187 static const int _table2_##tag = _tmp_##tag; \
174 STATIC_ASSERT(_table1_##tag == _table2_##tag); 188 STATIC_ASSERT(_table1_##tag == _table2_##tag);
175 ICETYPEX8632_TABLE; 189 ICETYPEX8632_TABLE;
176 #undef X 190 #undef X
177 // Repeat the static asserts with respect to the high-level 191 // Repeat the static asserts with respect to the high-level
178 // table entries in case the high-level table has extra entries. 192 // table entries in case the high-level table has extra entries.
179 #define X(tag, size, align, elts, elty, str) \ 193 #define X(tag, size, align, elts, elty, str) \
180 STATIC_ASSERT(_table1_##tag == _table2_##tag); 194 STATIC_ASSERT(_table1_##tag == _table2_##tag);
181 ICETYPE_TABLE; 195 ICETYPE_TABLE;
182 #undef X 196 #undef X
(...skipping 947 matching lines...) Expand 10 before | Expand all | Expand 10 after
1130 } break; 1144 } break;
1131 case InstArithmetic::Fadd: 1145 case InstArithmetic::Fadd:
1132 case InstArithmetic::Fsub: 1146 case InstArithmetic::Fsub:
1133 case InstArithmetic::Fmul: 1147 case InstArithmetic::Fmul:
1134 case InstArithmetic::Fdiv: 1148 case InstArithmetic::Fdiv:
1135 case InstArithmetic::Frem: 1149 case InstArithmetic::Frem:
1136 llvm_unreachable("FP instruction with i64 type"); 1150 llvm_unreachable("FP instruction with i64 type");
1137 break; 1151 break;
1138 } 1152 }
1139 } else if (isVectorType(Dest->getType())) { 1153 } else if (isVectorType(Dest->getType())) {
1154 // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in
1155 // registers. This is a workaround of the fact that there is no
1156 // support for aligning stack operands. Once alignment support is
1157 // implemented, replace legalizeToVar(Src1) with Src1.
Jim Stichnoth 2014/07/16 19:17:10 I'm not adamant about this, but I think it might b
wala 2014/07/17 01:34:54 Good idea. Done.
1158 //
1159 // TODO: Trap on divide and modulo by zero.
Jim Stichnoth 2014/07/16 19:17:10 I believe this should only trap for integer div/mo
wala 2014/07/17 01:34:53 Done.
1160 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
1140 switch (Inst->getOp()) { 1161 switch (Inst->getOp()) {
1141 case InstArithmetic::_num: 1162 case InstArithmetic::_num:
1142 llvm_unreachable("Unknown arithmetic operator"); 1163 llvm_unreachable("Unknown arithmetic operator");
1143 break; 1164 break;
1144 case InstArithmetic::Add: 1165 case InstArithmetic::Add: {
1145 case InstArithmetic::And: 1166 Variable *T = makeReg(Dest->getType());
1146 case InstArithmetic::Or: 1167 _movp(T, Src0);
1147 case InstArithmetic::Xor: 1168 _padd(T, legalizeToVar(Src1));
1148 case InstArithmetic::Sub: 1169 _movp(Dest, T);
1149 case InstArithmetic::Mul: 1170 } break;
1150 case InstArithmetic::Shl: 1171 case InstArithmetic::And: {
1151 case InstArithmetic::Lshr: 1172 Variable *T = makeReg(Dest->getType());
1152 case InstArithmetic::Ashr: 1173 _movp(T, Src0);
1153 case InstArithmetic::Udiv: 1174 _pand(T, legalizeToVar(Src1));
1154 case InstArithmetic::Sdiv: 1175 _movp(Dest, T);
1155 case InstArithmetic::Urem: 1176 } break;
1156 case InstArithmetic::Srem: 1177 case InstArithmetic::Or: {
1157 // TODO(wala): Handle these. 1178 Variable *T = makeReg(Dest->getType());
1158 Func->setError("Unhandled instruction"); 1179 _movp(T, Src0);
1159 break; 1180 _por(T, legalizeToVar(Src1));
1181 _movp(Dest, T);
1182 } break;
1183 case InstArithmetic::Xor: {
1184 Variable *T = makeReg(Dest->getType());
1185 _movp(T, Src0);
1186 _pxor(T, legalizeToVar(Src1));
1187 _movp(Dest, T);
1188 } break;
1189 case InstArithmetic::Sub: {
1190 Variable *T = makeReg(Dest->getType());
1191 _movp(T, Src0);
1192 _psub(T, legalizeToVar(Src1));
1193 _movp(Dest, T);
1194 } break;
1195 case InstArithmetic::Mul: {
1196 if (Dest->getType() == IceType_v4i32) {
1197 // Lowering sequence:
1198 // movups T1, Src0
1199 // pshufd T2, Src0, [1,0,3,0]
jvoung (off chromium) 2014/07/16 19:23:26 nit: The ordering of the vector ([1, 0, 3, 0]) is
wala 2014/07/17 01:34:53 I write vectors as if they were arrays in the comm
jvoung (off chromium) 2014/07/17 15:00:38 Okay, that helps some.
1200 // pshufd T3, Src1, [1,0,3,0]
1201 // # T1 = { Src0[0] * Src1[0], Src0[2] * Src1[2] }
1202 // pmuludq T1, Src1
1203 // # T2 = { Src0[1] * Src1[1], Src0[3] * Src1[3] }
1204 // pmuludq T2, T3
1205 // # T1 = { lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2]) }
1206 // shufps T1, T2, [0,2,0,2]
1207 // pshufd T4, T1, [0,2,1,3]
1208 // movups Dest, T4
1209 //
1210 // TODO(wala): SSE4.1 has pmulld.
1211
1212 // Mask that directs pshufd to create a vector with entries
1213 // Src[1, 0, 3, 0]
1214 const unsigned Constant1030 = 0x31;
1215 Constant *Mask1030 = Ctx->getConstantInt(IceType_i8, Constant1030);
1216 // Mask that directs shufps to create a vector with entries
1217 // Dest[0, 2], Src[0, 2]
1218 const unsigned Mask0202 = 0x88;
1219 // Mask that directs pshufd to create a vector with entries
1220 // Src[0, 2, 1, 3]
1221 const unsigned Mask0213 = 0xd8;
1222 Variable *T1 = makeReg(IceType_v4i32);
1223 Variable *T2 = makeReg(IceType_v4i32);
1224 Variable *T3 = makeReg(IceType_v4i32);
1225 Variable *T4 = makeReg(IceType_v4i32);
1226 _movp(T1, Src0);
1227 // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R
1228 // with Src1 after stack operand alignment support is
1229 // implemented.
1230 Variable *Src0R = legalizeToVar(Src0);
1231 Variable *Src1R = legalizeToVar(Src1);
1232 _pshufd(T2, Src0R, Mask1030);
1233 _pshufd(T3, Src1R, Mask1030);
1234 _pmuludq(T1, Src1R);
1235 _pmuludq(T2, T3);
1236 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));
1237 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));
1238 _movp(Dest, T4);
1239 } else if (Dest->getType() == IceType_v8i16) {
1240 Variable *T = makeReg(IceType_v8i16);
1241 _movp(T, Src0);
1242 _pmullw(T, legalizeToVar(Src1));
1243 _movp(Dest, T);
1244 } else {
1245 assert(Dest->getType() == IceType_v16i8);
1246 // Sz_mul_v16i8
1247 const IceString Helper = "Sz_mul_v16i8";
1248 const SizeT MaxSrcs = 2;
1249 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1250 Call->addArg(Src0);
1251 Call->addArg(Src1);
1252 lowerCall(Call);
1253 }
1254 } break;
1255 case InstArithmetic::Shl: {
1256 // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8
1257 const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType());
1258 const SizeT MaxSrcs = 2;
1259 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1260 Call->addArg(Src0);
1261 Call->addArg(Src1);
1262 lowerCall(Call);
1263 } break;
1264 case InstArithmetic::Lshr: {
1265 // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8
1266 const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType());
1267 const SizeT MaxSrcs = 2;
1268 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1269 Call->addArg(Src0);
1270 Call->addArg(Src1);
1271 lowerCall(Call);
1272 } break;
1273 case InstArithmetic::Ashr: {
1274 // Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8
1275 const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType());
1276 const SizeT MaxSrcs = 2;
1277 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1278 Call->addArg(Src0);
1279 Call->addArg(Src1);
1280 lowerCall(Call);
1281 } break;
1282 case InstArithmetic::Udiv: {
1283 // Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8
1284 const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType());
1285 const SizeT MaxSrcs = 2;
1286 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1287 Call->addArg(Src0);
1288 Call->addArg(Src1);
1289 lowerCall(Call);
1290 } break;
1291 case InstArithmetic::Sdiv: {
1292 // Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8
1293 const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType());
1294 const SizeT MaxSrcs = 2;
1295 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1296 Call->addArg(Src0);
1297 Call->addArg(Src1);
1298 lowerCall(Call);
1299 } break;
1300 case InstArithmetic::Urem: {
1301 // Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8
1302 const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType());
1303 const SizeT MaxSrcs = 2;
1304 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1305 Call->addArg(Src0);
1306 Call->addArg(Src1);
1307 lowerCall(Call);
1308 } break;
1309 case InstArithmetic::Srem: {
1310 // Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8
1311 const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType());
1312 const SizeT MaxSrcs = 2;
1313 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1314 Call->addArg(Src0);
1315 Call->addArg(Src1);
1316 lowerCall(Call);
1317 } break;
1160 case InstArithmetic::Fadd: { 1318 case InstArithmetic::Fadd: {
1161 Variable *T = makeReg(Dest->getType()); 1319 Variable *T = makeReg(Dest->getType());
1162 _movp(T, Src0); 1320 _movp(T, Src0);
1163 _addps(T, Src1); 1321 _addps(T, legalizeToVar(Src1));
1164 _movp(Dest, T); 1322 _movp(Dest, T);
1165 } break; 1323 } break;
1166 case InstArithmetic::Fsub: { 1324 case InstArithmetic::Fsub: {
1167 Variable *T = makeReg(Dest->getType()); 1325 Variable *T = makeReg(Dest->getType());
1168 _movp(T, Src0); 1326 _movp(T, Src0);
1169 _subps(T, Src1); 1327 _subps(T, legalizeToVar(Src1));
1170 _movp(Dest, T); 1328 _movp(Dest, T);
1171 } break; 1329 } break;
1172 case InstArithmetic::Fmul: { 1330 case InstArithmetic::Fmul: {
1173 Variable *T = makeReg(Dest->getType()); 1331 Variable *T = makeReg(Dest->getType());
1174 _movp(T, Src0); 1332 _movp(T, Src0);
1175 _mulps(T, Src1); 1333 _mulps(T, legalizeToVar(Src1));
1176 _movp(Dest, T); 1334 _movp(Dest, T);
1177 } break; 1335 } break;
1178 case InstArithmetic::Fdiv: { 1336 case InstArithmetic::Fdiv: {
1179 Variable *T = makeReg(Dest->getType()); 1337 Variable *T = makeReg(Dest->getType());
1180 _movp(T, Src0); 1338 _movp(T, Src0);
1181 _divps(T, Src1); 1339 _divps(T, legalizeToVar(Src1));
1182 _movp(Dest, T); 1340 _movp(Dest, T);
1183 } break; 1341 } break;
1184 case InstArithmetic::Frem: { 1342 case InstArithmetic::Frem: {
1185 const SizeT MaxSrcs = 2; 1343 const SizeT MaxSrcs = 2;
1186 InstCall *Call = makeHelperCall("__frem_v4f32", Dest, MaxSrcs); 1344 InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs);
1187 Call->addArg(Src0); 1345 Call->addArg(Src0);
1188 Call->addArg(Src1); 1346 Call->addArg(Src1);
1189 lowerCall(Call); 1347 lowerCall(Call);
1190 } break; 1348 } break;
1191 } 1349 }
1192 } else { // Dest->getType() is non-i64 scalar 1350 } else { // Dest->getType() is non-i64 scalar
1193 Variable *T_edx = NULL; 1351 Variable *T_edx = NULL;
1194 Variable *T = NULL; 1352 Variable *T = NULL;
1195 switch (Inst->getOp()) { 1353 switch (Inst->getOp()) {
1196 case InstArithmetic::_num: 1354 case InstArithmetic::_num:
(...skipping 1873 matching lines...) Expand 10 before | Expand all | Expand 10 after
3070 for (SizeT i = 0; i < Size; ++i) { 3228 for (SizeT i = 0; i < Size; ++i) {
3071 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 3229 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
3072 } 3230 }
3073 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 3231 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
3074 } 3232 }
3075 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 3233 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
3076 << "\n"; 3234 << "\n";
3077 } 3235 }
3078 3236
3079 } // end of namespace Ice 3237 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698