Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1023)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 412593002: Lower icmp operations between vector values. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Pass -filetype=obj to llvm-mc. Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after
165 { 165 {
166 // Define a temporary set of enum values based on low-level 166 // Define a temporary set of enum values based on low-level
167 // table entries. 167 // table entries.
168 enum _tmp_enum { 168 enum _tmp_enum {
169 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val, 169 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
170 ICMPX8632_TABLE 170 ICMPX8632_TABLE
171 #undef X 171 #undef X
172 _num 172 _num
173 }; 173 };
174 // Define a set of constants based on high-level table entries. 174 // Define a set of constants based on high-level table entries.
175 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; 175 #define X(tag, str, isunsigned) static const int _table1_##tag = InstIcmp::tag;
176 ICEINSTICMP_TABLE; 176 ICEINSTICMP_TABLE;
177 #undef X 177 #undef X
178 // Define a set of constants based on low-level table entries, 178 // Define a set of constants based on low-level table entries,
179 // and ensure the table entry keys are consistent. 179 // and ensure the table entry keys are consistent.
180 #define X(val, C_32, C1_64, C2_64, C3_64) \ 180 #define X(val, C_32, C1_64, C2_64, C3_64) \
181 static const int _table2_##val = _tmp_##val; \ 181 static const int _table2_##val = _tmp_##val; \
182 STATIC_ASSERT(_table1_##val == _table2_##val); 182 STATIC_ASSERT(_table1_##val == _table2_##val);
183 ICMPX8632_TABLE; 183 ICMPX8632_TABLE;
184 #undef X 184 #undef X
185 // Repeat the static asserts with respect to the high-level 185 // Repeat the static asserts with respect to the high-level
186 // table entries in case the high-level table has extra entries. 186 // table entries in case the high-level table has extra entries.
187 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); 187 #define X(tag, str, isunsigned) STATIC_ASSERT(_table1_##tag == _table2_##tag);
188 ICEINSTICMP_TABLE; 188 ICEINSTICMP_TABLE;
189 #undef X 189 #undef X
190 } 190 }
191 191
192 // Validate the enum values in ICETYPEX8632_TABLE. 192 // Validate the enum values in ICETYPEX8632_TABLE.
193 { 193 {
194 // Define a temporary set of enum values based on low-level 194 // Define a temporary set of enum values based on low-level
195 // table entries. 195 // table entries.
196 enum _tmp_enum { 196 enum _tmp_enum {
197 #define X(tag, elementty, cvt, sdss, pack, width) _tmp_##tag, 197 #define X(tag, elementty, cvt, sdss, pack, width) _tmp_##tag,
(...skipping 1074 matching lines...) Expand 10 before | Expand all | Expand 10 after
1272 // Sz_mul_v16i8 1272 // Sz_mul_v16i8
1273 const IceString Helper = "Sz_mul_v16i8"; 1273 const IceString Helper = "Sz_mul_v16i8";
1274 const SizeT MaxSrcs = 2; 1274 const SizeT MaxSrcs = 2;
1275 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); 1275 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1276 Call->addArg(Src0); 1276 Call->addArg(Src0);
1277 Call->addArg(Src1); 1277 Call->addArg(Src1);
1278 lowerCall(Call); 1278 lowerCall(Call);
1279 } 1279 }
1280 } break; 1280 } break;
1281 case InstArithmetic::Shl: { 1281 case InstArithmetic::Shl: {
1282 // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8 1282 // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8
jvoung (off chromium) 2014/07/23 18:54:55 Could this have used _psll then, for i32 and i16 e
wala 2014/07/23 20:40:36 No. Unfortunately _psll shifts all the fields in t
1283 const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType()); 1283 const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType());
1284 const SizeT MaxSrcs = 2; 1284 const SizeT MaxSrcs = 2;
1285 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); 1285 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1286 Call->addArg(Src0); 1286 Call->addArg(Src0);
1287 Call->addArg(Src1); 1287 Call->addArg(Src1);
1288 lowerCall(Call); 1288 lowerCall(Call);
1289 } break; 1289 } break;
1290 case InstArithmetic::Lshr: { 1290 case InstArithmetic::Lshr: {
1291 // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8 1291 // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8
1292 const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType()); 1292 const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType());
(...skipping 961 matching lines...) Expand 10 before | Expand all | Expand 10 after
2254 _mov(Dest, NonDefault); 2254 _mov(Dest, NonDefault);
2255 Context.insert(Label); 2255 Context.insert(Label);
2256 } 2256 }
2257 } 2257 }
2258 2258
2259 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { 2259 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
2260 Operand *Src0 = legalize(Inst->getSrc(0)); 2260 Operand *Src0 = legalize(Inst->getSrc(0));
2261 Operand *Src1 = legalize(Inst->getSrc(1)); 2261 Operand *Src1 = legalize(Inst->getSrc(1));
2262 Variable *Dest = Inst->getDest(); 2262 Variable *Dest = Inst->getDest();
2263 2263
2264 // If Src1 is an immediate, or known to be a physical register, we can 2264 if (isVectorType(Dest->getType())) {
2265 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into 2265 Type Ty = Src0->getType();
2266 // a physical register. (Actually, either Src0 or Src1 can be chosen for 2266 // Promote i1 vectors to 128 bit integer vector types.
2267 // the physical register, but unfortunately we have to commit to one or 2267 if (typeElementType(Ty) == IceType_i1) {
2268 // the other before register allocation.) 2268 Type NewTy = IceType_NUM;
2269 bool IsSrc1ImmOrReg = false; 2269 switch (Ty) {
2270 if (llvm::isa<Constant>(Src1)) { 2270 default:
2271 IsSrc1ImmOrReg = true; 2271 llvm_unreachable("unexpected type");
2272 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { 2272 break;
2273 if (Var->hasReg()) 2273 case IceType_v4i1:
2274 NewTy = IceType_v4i32;
2275 break;
2276 case IceType_v8i1:
2277 NewTy = IceType_v8i16;
2278 break;
2279 case IceType_v16i1:
2280 NewTy = IceType_v16i8;
2281 break;
2282 }
2283 Variable *NewSrc0 = Func->makeVariable(NewTy, Context.getNode());
2284 Variable *NewSrc1 = Func->makeVariable(NewTy, Context.getNode());
2285 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
2286 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
2287 Src0 = NewSrc0;
2288 Src1 = NewSrc1;
2289 Ty = NewTy;
2290 }
2291
2292 // SSE2 only has signed comparison operations. Transform unsigned
2293 // inputs in a manner that allows for the use of signed comparison
2294 // operations by flipping the high order bits.
2295 if (Inst->isUnsigned()) {
Jim Stichnoth 2014/07/23 17:29:28 I have a minor problem and a major problem with is
wala 2014/07/23 20:40:36 Done. Removed the unsigned attribute.
2296 Variable *T0 = makeReg(Ty);
2297 Variable *T1 = makeReg(Ty);
2298 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
2299 _movp(T0, Src0);
2300 _pxor(T0, HighOrderBits);
2301 _movp(T1, Src1);
2302 _pxor(T1, HighOrderBits);
2303 Src0 = T0;
2304 Src1 = T1;
2305 }
2306
2307 // TODO: ALIGNHACK: Both operands to compare instructions need to be
2308 // in registers until stack alignment support is implemented. Once
2309 // there is support for stack alignment, LEGAL_HACK can be removed.
2310 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
2311 Variable *T = makeReg(Ty);
2312 switch (Inst->getCondition()) {
2313 default:
2314 llvm_unreachable("unexpected condition");
2315 break;
2316 case InstIcmp::Eq: {
2317 _movp(T, Src0);
2318 _pcmpeq(T, LEGAL_HACK(Src1));
2319 } break;
2320 case InstIcmp::Ne: {
2321 _movp(T, Src0);
2322 _pcmpeq(T, LEGAL_HACK(Src1));
2323 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2324 _pxor(T, MinusOne);
2325 } break;
2326 case InstIcmp::Ugt:
2327 case InstIcmp::Sgt: {
2328 _movp(T, Src0);
2329 _pcmpgt(T, LEGAL_HACK(Src1));
2330 } break;
2331 case InstIcmp::Uge:
2332 case InstIcmp::Sge: {
2333 // !(Src1 > Src0)
2334 _movp(T, Src1);
2335 _pcmpgt(T, LEGAL_HACK(Src0));
2336 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2337 _pxor(T, MinusOne);
2338 } break;
2339 case InstIcmp::Ult:
2340 case InstIcmp::Slt: {
2341 _movp(T, Src1);
2342 _pcmpgt(T, LEGAL_HACK(Src0));
2343 } break;
2344 case InstIcmp::Ule:
2345 case InstIcmp::Sle: {
2346 // !(Src0 > Src1)
2347 _movp(T, Src0);
2348 _pcmpgt(T, LEGAL_HACK(Src1));
2349 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2350 _pxor(T, MinusOne);
2351 } break;
2352 }
2353 #undef LEGAL_HACK
2354
2355 _movp(Dest, T);
2356
2357 // The following pattern occurs often in lowered C and C++ code:
2358 //
2359 // %cmp = icmp pred <n x ty> %src0, %src1
2360 // %cmp.ext = sext <n x i1> %cmp to <n x ty>
2361 //
2362 // We can avoid the sext operation by copying the result from pcmpgt
2363 // and pcmpeq, which is already sign extended, to the result of the
2364 // sext operation
2365 if (InstCast *NextCast =
2366 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
2367 if (NextCast->getCastKind() == InstCast::Sext &&
2368 NextCast->getSrc(0) == Dest) {
2369 _movp(NextCast->getDest(), T);
2370 // Skip over the instruction.
2371 NextCast->setDeleted();
2372 Context.advanceNext();
2373 }
2374 }
jvoung (off chromium) 2014/07/23 18:54:55 could this just return; and then the scalar versi
wala 2014/07/23 20:40:36 Done.
2375 } else {
2376 // If Src1 is an immediate, or known to be a physical register, we can
2377 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
2378 // a physical register. (Actually, either Src0 or Src1 can be chosen for
2379 // the physical register, but unfortunately we have to commit to one or
2380 // the other before register allocation.)
2381 bool IsSrc1ImmOrReg = false;
2382 if (llvm::isa<Constant>(Src1)) {
2274 IsSrc1ImmOrReg = true; 2383 IsSrc1ImmOrReg = true;
2275 } 2384 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
2385 if (Var->hasReg())
2386 IsSrc1ImmOrReg = true;
2387 }
2276 2388
2277 // Try to fuse a compare immediately followed by a conditional branch. This 2389 // Try to fuse a compare immediately followed by a conditional branch. This
2278 // is possible when the compare dest and the branch source operands are the 2390 // is possible when the compare dest and the branch source operands are the
2279 // same, and are their only uses. TODO: implement this optimization for i64. 2391 // same, and are their only uses. TODO: implement this optimization for
2280 if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) { 2392 // i64.
2281 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() && 2393 if (InstBr *NextBr =
2282 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) { 2394 llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
2283 Operand *Src0New = 2395 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
2284 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); 2396 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
2285 _cmp(Src0New, Src1); 2397 Operand *Src0New =
2286 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), 2398 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
2287 NextBr->getTargetFalse()); 2399 _cmp(Src0New, Src1);
2288 // Skip over the following branch instruction. 2400 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
2289 NextBr->setDeleted(); 2401 NextBr->getTargetFalse());
2290 Context.advanceNext(); 2402 // Skip over the following branch instruction.
2403 NextBr->setDeleted();
2404 Context.advanceNext();
2405 return;
2406 }
2407 }
2408
2409 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
2410 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2411 Constant *One = Ctx->getConstantInt(IceType_i32, 1);
2412 if (Src0->getType() == IceType_i64) {
2413 InstIcmp::ICond Condition = Inst->getCondition();
2414 size_t Index = static_cast<size_t>(Condition);
2415 assert(Index < TableIcmp64Size);
2416 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2417 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2418 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
2419 InstX8632Label *Label = InstX8632Label::create(Func, this);
2420 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
2421 _cmp(loOperand(Src0), Src1LoRI);
2422 _br(InstX8632Br::Br_ne, Label);
2423 _cmp(hiOperand(Src0), Src1HiRI);
2424 _br(InstX8632Br::Br_ne, Label);
2425 Context.insert(InstFakeUse::create(Func, Dest));
2426 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));
2427 Context.insert(Label);
2428 } else {
2429 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
2430 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
2431 _mov(Dest, One);
2432 _cmp(hiOperand(Src0), Src1HiRI);
2433 _br(TableIcmp64[Index].C1, LabelTrue);
2434 _br(TableIcmp64[Index].C2, LabelFalse);
2435 _cmp(loOperand(Src0), Src1LoRI);
2436 _br(TableIcmp64[Index].C3, LabelTrue);
2437 Context.insert(LabelFalse);
2438 Context.insert(InstFakeUse::create(Func, Dest));
2439 _mov(Dest, Zero);
2440 Context.insert(LabelTrue);
2441 }
2291 return; 2442 return;
2292 } 2443 }
2444
2445 // cmp b, c
2446 Operand *Src0New =
2447 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
2448 InstX8632Label *Label = InstX8632Label::create(Func, this);
2449 _cmp(Src0New, Src1);
2450 _mov(Dest, One);
2451 _br(getIcmp32Mapping(Inst->getCondition()), Label);
2452 Context.insert(InstFakeUse::create(Func, Dest));
2453 _mov(Dest, Zero);
2454 Context.insert(Label);
2293 } 2455 }
2294
2295 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
2296 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2297 Constant *One = Ctx->getConstantInt(IceType_i32, 1);
2298 if (Src0->getType() == IceType_i64) {
2299 InstIcmp::ICond Condition = Inst->getCondition();
2300 size_t Index = static_cast<size_t>(Condition);
2301 assert(Index < TableIcmp64Size);
2302 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2303 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2304 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
2305 InstX8632Label *Label = InstX8632Label::create(Func, this);
2306 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
2307 _cmp(loOperand(Src0), Src1LoRI);
2308 _br(InstX8632Br::Br_ne, Label);
2309 _cmp(hiOperand(Src0), Src1HiRI);
2310 _br(InstX8632Br::Br_ne, Label);
2311 Context.insert(InstFakeUse::create(Func, Dest));
2312 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));
2313 Context.insert(Label);
2314 } else {
2315 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
2316 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
2317 _mov(Dest, One);
2318 _cmp(hiOperand(Src0), Src1HiRI);
2319 _br(TableIcmp64[Index].C1, LabelTrue);
2320 _br(TableIcmp64[Index].C2, LabelFalse);
2321 _cmp(loOperand(Src0), Src1LoRI);
2322 _br(TableIcmp64[Index].C3, LabelTrue);
2323 Context.insert(LabelFalse);
2324 Context.insert(InstFakeUse::create(Func, Dest));
2325 _mov(Dest, Zero);
2326 Context.insert(LabelTrue);
2327 }
2328 return;
2329 }
2330
2331 // cmp b, c
2332 Operand *Src0New =
2333 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
2334 InstX8632Label *Label = InstX8632Label::create(Func, this);
2335 _cmp(Src0New, Src1);
2336 _mov(Dest, One);
2337 _br(getIcmp32Mapping(Inst->getCondition()), Label);
2338 Context.insert(InstFakeUse::create(Func, Dest));
2339 _mov(Dest, Zero);
2340 Context.insert(Label);
2341 } 2456 }
2342 2457
2343 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { 2458 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
2344 Operand *SourceVectOperand = Inst->getSrc(0); 2459 Operand *SourceVectOperand = Inst->getSrc(0);
2345 Operand *ElementToInsert = Inst->getSrc(1); 2460 Operand *ElementToInsert = Inst->getSrc(1);
2346 ConstantInteger *ElementIndex = 2461 ConstantInteger *ElementIndex =
2347 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2)); 2462 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2));
2348 // Only constant indices are allowed in PNaCl IR. 2463 // Only constant indices are allowed in PNaCl IR.
2349 assert(ElementIndex); 2464 assert(ElementIndex);
2350 unsigned Index = ElementIndex->getValue(); 2465 unsigned Index = ElementIndex->getValue();
(...skipping 1040 matching lines...) Expand 10 before | Expand all | Expand 10 after
3391 _br(Inst->getLabelDefault()); 3506 _br(Inst->getLabelDefault());
3392 } 3507 }
3393 3508
3394 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { 3509 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
3395 const SizeT MaxSrcs = 0; 3510 const SizeT MaxSrcs = 0;
3396 Variable *Dest = NULL; 3511 Variable *Dest = NULL;
3397 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs); 3512 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);
3398 lowerCall(Call); 3513 lowerCall(Call);
3399 } 3514 }
3400 3515
3516 // There is no support for loading or emitting vector constants, so the
3517 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,
3518 // etc. are initialized with register operations.
3519 //
3520 // TODO(wala): Add limited support for vector constants so that
3521 // complex initialization in registers is unnecessary.
3522
3401 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { 3523 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
3402 // There is no support for loading or emitting vector constants, so
3403 // this value is initialized using register operations.
3404 Variable *Reg = makeReg(Ty, RegNum); 3524 Variable *Reg = makeReg(Ty, RegNum);
3405 // Insert a FakeDef, since otherwise the live range of Reg might 3525 // Insert a FakeDef, since otherwise the live range of Reg might
3406 // be overestimated. 3526 // be overestimated.
3407 Context.insert(InstFakeDef::create(Func, Reg)); 3527 Context.insert(InstFakeDef::create(Func, Reg));
3408 _pxor(Reg, Reg); 3528 _pxor(Reg, Reg);
3409 return Reg; 3529 return Reg;
3410 } 3530 }
3411 3531
3532 Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {
3533 Variable *MinusOnes = makeReg(Ty, RegNum);
3534 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
3535 Context.insert(InstFakeDef::create(Func, MinusOnes));
3536 _pcmpeq(MinusOnes, MinusOnes);
3537 return MinusOnes;
3538 }
3539
3412 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { 3540 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
3413 // There is no support for loading or emitting vector constants, so
3414 // this value is initialized using register operations.
3415 Variable *Dest = makeVectorOfZeros(Ty, RegNum); 3541 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
3416 Variable *MinusOne = makeReg(Ty); 3542 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3417 // Insert a FakeDef so the live range of MinusOne is not overestimated.
3418 Context.insert(InstFakeDef::create(Func, MinusOne));
3419 _pcmpeq(MinusOne, MinusOne);
3420 _psub(Dest, MinusOne); 3543 _psub(Dest, MinusOne);
3421 return Dest; 3544 return Dest;
3422 } 3545 }
3423 3546
3547 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
3548 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
3549 Ty == IceType_v16i8);
3550 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
3551 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
3552 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
3553 _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift));
3554 return Reg;
3555 } else {
3556 // SSE has no left shift operation for vectors of 8 bit integers.
3557 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
3558 Constant *ConstantMask =
3559 Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK);
3560 Variable *Reg = makeReg(Ty, RegNum);
3561 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
3562 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
3563 return Reg;
3564 }
3565 }
3566
3424 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, 3567 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
3425 Variable *Slot, 3568 Variable *Slot,
3426 uint32_t Offset) { 3569 uint32_t Offset) {
3427 // Ensure that Loc is a stack slot. 3570 // Ensure that Loc is a stack slot.
3428 assert(Slot->getWeight() == RegWeight::Zero); 3571 assert(Slot->getWeight() == RegWeight::Zero);
3429 assert(Slot->getRegNum() == Variable::NoRegister); 3572 assert(Slot->getRegNum() == Variable::NoRegister);
3430 // Compute the location of Loc in memory. 3573 // Compute the location of Loc in memory.
3431 // TODO(wala,stichnot): lea should not be required. The address of 3574 // TODO(wala,stichnot): lea should not be required. The address of
3432 // the stack slot is known at compile time (although not until after 3575 // the stack slot is known at compile time (although not until after
3433 // addProlog()). 3576 // addProlog()).
(...skipping 318 matching lines...) Expand 10 before | Expand all | Expand 10 after
3752 for (SizeT i = 0; i < Size; ++i) { 3895 for (SizeT i = 0; i < Size; ++i) {
3753 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 3896 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
3754 } 3897 }
3755 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 3898 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
3756 } 3899 }
3757 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 3900 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
3758 << "\n"; 3901 << "\n";
3759 } 3902 }
3760 3903
3761 } // end of namespace Ice 3904 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698