OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 998 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1009 if (const auto *ConstantTotalSize = | 1009 if (const auto *ConstantTotalSize = |
1010 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { | 1010 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { |
1011 const uint32_t Value = | 1011 const uint32_t Value = |
1012 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment); | 1012 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment); |
1013 if (!UseFramePointer) { | 1013 if (!UseFramePointer) { |
1014 // If we don't need a Frame Pointer, this alloca has a known offset to the | 1014 // If we don't need a Frame Pointer, this alloca has a known offset to the |
1015 // stack pointer. We don't need adjust the stack pointer, nor assign any | 1015 // stack pointer. We don't need adjust the stack pointer, nor assign any |
1016 // value to Dest, as Dest is rematerializable. | 1016 // value to Dest, as Dest is rematerializable. |
1017 assert(Dest->isRematerializable()); | 1017 assert(Dest->isRematerializable()); |
1018 FixedAllocaSizeBytes += Value; | 1018 FixedAllocaSizeBytes += Value; |
1019 Context.insert(InstFakeDef::create(Func, Dest)); | 1019 Context.insert<InstFakeDef>(Dest); |
1020 } else { | 1020 } else { |
1021 _sub(esp, Ctx->getConstantInt32(Value)); | 1021 _sub(esp, Ctx->getConstantInt32(Value)); |
1022 } | 1022 } |
1023 } else { | 1023 } else { |
1024 // Non-constant sizes need to be adjusted to the next highest multiple of | 1024 // Non-constant sizes need to be adjusted to the next highest multiple of |
1025 // the required alignment at runtime. | 1025 // the required alignment at runtime. |
1026 Variable *T = makeReg(IceType_i32); | 1026 Variable *T = makeReg(IceType_i32); |
1027 _mov(T, TotalSize); | 1027 _mov(T, TotalSize); |
1028 _add(T, Ctx->getConstantInt32(Alignment - 1)); | 1028 _add(T, Ctx->getConstantInt32(Alignment - 1)); |
1029 _and(T, Ctx->getConstantInt32(-Alignment)); | 1029 _and(T, Ctx->getConstantInt32(-Alignment)); |
(...skipping 321 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1351 Context.insert(Label); | 1351 Context.insert(Label); |
1352 _mov(DestLo, T_2); | 1352 _mov(DestLo, T_2); |
1353 _mov(DestHi, T_3); | 1353 _mov(DestHi, T_3); |
1354 } | 1354 } |
1355 } | 1355 } |
1356 | 1356 |
1357 template <class Machine> | 1357 template <class Machine> |
1358 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 1358 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { |
1359 Variable *Dest = Inst->getDest(); | 1359 Variable *Dest = Inst->getDest(); |
1360 if (Dest->isRematerializable()) { | 1360 if (Dest->isRematerializable()) { |
1361 Context.insert(InstFakeDef::create(Func, Dest)); | 1361 Context.insert<InstFakeDef>(Dest); |
1362 return; | 1362 return; |
1363 } | 1363 } |
1364 Type Ty = Dest->getType(); | 1364 Type Ty = Dest->getType(); |
1365 Operand *Src0 = legalize(Inst->getSrc(0)); | 1365 Operand *Src0 = legalize(Inst->getSrc(0)); |
1366 Operand *Src1 = legalize(Inst->getSrc(1)); | 1366 Operand *Src1 = legalize(Inst->getSrc(1)); |
1367 if (Inst->isCommutative()) { | 1367 if (Inst->isCommutative()) { |
1368 uint32_t SwapCount = 0; | 1368 uint32_t SwapCount = 0; |
1369 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) { | 1369 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) { |
1370 std::swap(Src0, Src1); | 1370 std::swap(Src0, Src1); |
1371 ++SwapCount; | 1371 ++SwapCount; |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1469 // The mul instruction cannot take an immediate operand. | 1469 // The mul instruction cannot take an immediate operand. |
1470 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem); | 1470 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem); |
1471 _mov(T_1, Src0Hi); | 1471 _mov(T_1, Src0Hi); |
1472 _imul(T_1, Src1Lo); | 1472 _imul(T_1, Src1Lo); |
1473 _mov(T_2, Src1Hi); | 1473 _mov(T_2, Src1Hi); |
1474 _imul(T_2, Src0Lo); | 1474 _imul(T_2, Src0Lo); |
1475 _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax); | 1475 _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax); |
1476 _mul(T_4Lo, T_3, Src1Lo); | 1476 _mul(T_4Lo, T_3, Src1Lo); |
1477 // The mul instruction produces two dest variables, edx:eax. We create a | 1477 // The mul instruction produces two dest variables, edx:eax. We create a |
1478 // fake definition of edx to account for this. | 1478 // fake definition of edx to account for this. |
1479 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo)); | 1479 Context.insert<InstFakeDef>(T_4Hi, T_4Lo); |
1480 _mov(DestLo, T_4Lo); | 1480 _mov(DestLo, T_4Lo); |
1481 _add(T_4Hi, T_1); | 1481 _add(T_4Hi, T_1); |
1482 _add(T_4Hi, T_2); | 1482 _add(T_4Hi, T_2); |
1483 _mov(DestHi, T_4Hi); | 1483 _mov(DestHi, T_4Hi); |
1484 } break; | 1484 } break; |
1485 case InstArithmetic::Shl: | 1485 case InstArithmetic::Shl: |
1486 case InstArithmetic::Lshr: | 1486 case InstArithmetic::Lshr: |
1487 case InstArithmetic::Ashr: | 1487 case InstArithmetic::Ashr: |
1488 lowerShift64(Inst->getOp(), Src0Lo, Src0Hi, Src1Lo, DestLo, DestHi); | 1488 lowerShift64(Inst->getOp(), Src0Lo, Src0Hi, Src1Lo, DestLo, DestHi); |
1489 break; | 1489 break; |
(...skipping 414 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1904 case InstArithmetic::Frem: | 1904 case InstArithmetic::Frem: |
1905 llvm::report_fatal_error("Helper call was expected"); | 1905 llvm::report_fatal_error("Helper call was expected"); |
1906 break; | 1906 break; |
1907 } | 1907 } |
1908 } | 1908 } |
1909 | 1909 |
1910 template <class Machine> | 1910 template <class Machine> |
1911 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { | 1911 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { |
1912 Variable *Dest = Inst->getDest(); | 1912 Variable *Dest = Inst->getDest(); |
1913 if (Dest->isRematerializable()) { | 1913 if (Dest->isRematerializable()) { |
1914 Context.insert(InstFakeDef::create(Func, Dest)); | 1914 Context.insert<InstFakeDef>(Dest); |
1915 return; | 1915 return; |
1916 } | 1916 } |
1917 Operand *Src = Inst->getSrc(0); | 1917 Operand *Src = Inst->getSrc(0); |
1918 assert(Dest->getType() == Src->getType()); | 1918 assert(Dest->getType() == Src->getType()); |
1919 lowerMove(Dest, Src, false); | 1919 lowerMove(Dest, Src, false); |
1920 } | 1920 } |
1921 | 1921 |
1922 template <class Machine> | 1922 template <class Machine> |
1923 void TargetX86Base<Machine>::lowerBr(const InstBr *Br) { | 1923 void TargetX86Base<Machine>::lowerBr(const InstBr *Br) { |
1924 if (Br->isUnconditional()) { | 1924 if (Br->isUnconditional()) { |
(...skipping 445 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2370 | 2370 |
2371 Variable *T_Lo = nullptr, *T_Hi = nullptr; | 2371 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
2372 auto *SpillLo = Traits::VariableSplit::create( | 2372 auto *SpillLo = Traits::VariableSplit::create( |
2373 Func, Spill, Traits::VariableSplit::Low); | 2373 Func, Spill, Traits::VariableSplit::Low); |
2374 auto *SpillHi = Traits::VariableSplit::create( | 2374 auto *SpillHi = Traits::VariableSplit::create( |
2375 Func, Spill, Traits::VariableSplit::High); | 2375 Func, Spill, Traits::VariableSplit::High); |
2376 _mov(T_Lo, loOperand(Src0)); | 2376 _mov(T_Lo, loOperand(Src0)); |
2377 // Technically, the Spill is defined after the _store happens, but | 2377 // Technically, the Spill is defined after the _store happens, but |
2378 // SpillLo is considered a "use" of Spill so define Spill before it is | 2378 // SpillLo is considered a "use" of Spill so define Spill before it is |
2379 // used. | 2379 // used. |
2380 Context.insert(InstFakeDef::create(Func, Spill)); | 2380 Context.insert<InstFakeDef>(Spill); |
2381 _store(T_Lo, SpillLo); | 2381 _store(T_Lo, SpillLo); |
2382 _mov(T_Hi, hiOperand(Src0)); | 2382 _mov(T_Hi, hiOperand(Src0)); |
2383 _store(T_Hi, SpillHi); | 2383 _store(T_Hi, SpillHi); |
2384 _movq(Dest, Spill); | 2384 _movq(Dest, Spill); |
2385 } | 2385 } |
2386 } break; | 2386 } break; |
2387 case IceType_v8i1: { | 2387 case IceType_v8i1: { |
2388 llvm::report_fatal_error("Helper call was expected"); | 2388 llvm::report_fatal_error("Helper call was expected"); |
2389 } break; | 2389 } break; |
2390 case IceType_v16i1: { | 2390 case IceType_v16i1: { |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2443 } | 2443 } |
2444 | 2444 |
2445 if (InVectorElementTy == IceType_i32) { | 2445 if (InVectorElementTy == IceType_i32) { |
2446 _movd(ExtractedElementR, T); | 2446 _movd(ExtractedElementR, T); |
2447 } else { // Ty == IceType_f32 | 2447 } else { // Ty == IceType_f32 |
2448 // TODO(wala): _movss is only used here because _mov does not allow a | 2448 // TODO(wala): _movss is only used here because _mov does not allow a |
2449 // vector source and a scalar destination. _mov should be able to be | 2449 // vector source and a scalar destination. _mov should be able to be |
2450 // used here. | 2450 // used here. |
2451 // _movss is a binary instruction, so the FakeDef is needed to keep the | 2451 // _movss is a binary instruction, so the FakeDef is needed to keep the |
2452 // live range analysis consistent. | 2452 // live range analysis consistent. |
2453 Context.insert(InstFakeDef::create(Func, ExtractedElementR)); | 2453 Context.insert<InstFakeDef>(ExtractedElementR); |
2454 _movss(ExtractedElementR, T); | 2454 _movss(ExtractedElementR, T); |
2455 } | 2455 } |
2456 } else { | 2456 } else { |
2457 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); | 2457 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); |
2458 // Spill the value to a stack slot and do the extraction in memory. | 2458 // Spill the value to a stack slot and do the extraction in memory. |
2459 // | 2459 // |
2460 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support | 2460 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support |
2461 // for legalizing to mem is implemented. | 2461 // for legalizing to mem is implemented. |
2462 Variable *Slot = Func->makeVariable(Ty); | 2462 Variable *Slot = Func->makeVariable(Ty); |
2463 Slot->setMustNotHaveReg(); | 2463 Slot->setMustNotHaveReg(); |
(...skipping 415 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2879 switch (Condition) { | 2879 switch (Condition) { |
2880 default: | 2880 default: |
2881 llvm_unreachable("unexpected condition"); | 2881 llvm_unreachable("unexpected condition"); |
2882 break; | 2882 break; |
2883 case InstIcmp::Eq: | 2883 case InstIcmp::Eq: |
2884 case InstIcmp::Ule: | 2884 case InstIcmp::Ule: |
2885 // Mov Src0HiRM first, because it was legalized most recently, and will | 2885 // Mov Src0HiRM first, because it was legalized most recently, and will |
2886 // sometimes avoid a move before the OR. | 2886 // sometimes avoid a move before the OR. |
2887 _mov(Temp, Src0HiRM); | 2887 _mov(Temp, Src0HiRM); |
2888 _or(Temp, Src0LoRM); | 2888 _or(Temp, Src0LoRM); |
2889 Context.insert(InstFakeUse::create(Func, Temp)); | 2889 Context.insert<InstFakeUse>(Temp); |
2890 setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer); | 2890 setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer); |
2891 return; | 2891 return; |
2892 case InstIcmp::Ne: | 2892 case InstIcmp::Ne: |
2893 case InstIcmp::Ugt: | 2893 case InstIcmp::Ugt: |
2894 // Mov Src0HiRM first, because it was legalized most recently, and will | 2894 // Mov Src0HiRM first, because it was legalized most recently, and will |
2895 // sometimes avoid a move before the OR. | 2895 // sometimes avoid a move before the OR. |
2896 _mov(Temp, Src0HiRM); | 2896 _mov(Temp, Src0HiRM); |
2897 _or(Temp, Src0LoRM); | 2897 _or(Temp, Src0LoRM); |
2898 Context.insert(InstFakeUse::create(Func, Temp)); | 2898 Context.insert<InstFakeUse>(Temp); |
2899 setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer); | 2899 setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer); |
2900 return; | 2900 return; |
2901 case InstIcmp::Uge: | 2901 case InstIcmp::Uge: |
2902 movOrConsumer(true, Dest, Consumer); | 2902 movOrConsumer(true, Dest, Consumer); |
2903 return; | 2903 return; |
2904 case InstIcmp::Ult: | 2904 case InstIcmp::Ult: |
2905 movOrConsumer(false, Dest, Consumer); | 2905 movOrConsumer(false, Dest, Consumer); |
2906 return; | 2906 return; |
2907 case InstIcmp::Sgt: | 2907 case InstIcmp::Sgt: |
2908 break; | 2908 break; |
(...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3053 case InstArithmetic::Or: | 3053 case InstArithmetic::Or: |
3054 _mov(T, Src0); | 3054 _mov(T, Src0); |
3055 _or(T, Src1); | 3055 _or(T, Src1); |
3056 break; | 3056 break; |
3057 } | 3057 } |
3058 | 3058 |
3059 if (Consumer == nullptr) { | 3059 if (Consumer == nullptr) { |
3060 llvm::report_fatal_error("Expected a consumer instruction"); | 3060 llvm::report_fatal_error("Expected a consumer instruction"); |
3061 } | 3061 } |
3062 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { | 3062 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
3063 Context.insert(InstFakeUse::create(Func, T)); | 3063 Context.insert<InstFakeUse>(T); |
3064 Context.insert(InstFakeDef::create(Func, Dest)); | 3064 Context.insert<InstFakeDef>(Dest); |
3065 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); | 3065 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
3066 return; | 3066 return; |
3067 } | 3067 } |
3068 llvm::report_fatal_error("Unexpected consumer type"); | 3068 llvm::report_fatal_error("Unexpected consumer type"); |
3069 } | 3069 } |
3070 | 3070 |
3071 template <class Machine> | 3071 template <class Machine> |
3072 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { | 3072 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { |
3073 Operand *SourceVectNotLegalized = Inst->getSrc(0); | 3073 Operand *SourceVectNotLegalized = Inst->getSrc(0); |
3074 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); | 3074 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); |
(...skipping 208 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3283 // anyway, since this is x86-32 and integer arithmetic only happens on | 3283 // anyway, since this is x86-32 and integer arithmetic only happens on |
3284 // 32-bit quantities. | 3284 // 32-bit quantities. |
3285 Variable *T = makeReg(IceType_f64); | 3285 Variable *T = makeReg(IceType_f64); |
3286 typename Traits::X86OperandMem *Addr = | 3286 typename Traits::X86OperandMem *Addr = |
3287 formMemoryOperand(Instr->getArg(0), IceType_f64); | 3287 formMemoryOperand(Instr->getArg(0), IceType_f64); |
3288 _movq(T, Addr); | 3288 _movq(T, Addr); |
3289 // Then cast the bits back out of the XMM register to the i64 Dest. | 3289 // Then cast the bits back out of the XMM register to the i64 Dest. |
3290 auto *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); | 3290 auto *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); |
3291 lowerCast(Cast); | 3291 lowerCast(Cast); |
3292 // Make sure that the atomic load isn't elided when unused. | 3292 // Make sure that the atomic load isn't elided when unused. |
3293 Context.insert(InstFakeUse::create(Func, Dest64On32->getLo())); | 3293 Context.insert<InstFakeUse>(Dest64On32->getLo()); |
3294 Context.insert(InstFakeUse::create(Func, Dest64On32->getHi())); | 3294 Context.insert<InstFakeUse>(Dest64On32->getHi()); |
3295 return; | 3295 return; |
3296 } | 3296 } |
3297 } | 3297 } |
3298 auto *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); | 3298 auto *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); |
3299 lowerLoad(Load); | 3299 lowerLoad(Load); |
3300 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. | 3300 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. |
3301 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert | 3301 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert |
3302 // the FakeUse on the last-inserted instruction's dest. | 3302 // the FakeUse on the last-inserted instruction's dest. |
3303 Context.insert( | 3303 Context.insert<InstFakeUse>(Context.getLastInserted()->getDest()); |
3304 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); | |
3305 return; | 3304 return; |
3306 } | 3305 } |
3307 case Intrinsics::AtomicRMW: | 3306 case Intrinsics::AtomicRMW: |
3308 if (!Intrinsics::isMemoryOrderValid( | 3307 if (!Intrinsics::isMemoryOrderValid( |
3309 ID, getConstantMemoryOrder(Instr->getArg(3)))) { | 3308 ID, getConstantMemoryOrder(Instr->getArg(3)))) { |
3310 Func->setError("Unexpected memory ordering for AtomicRMW"); | 3309 Func->setError("Unexpected memory ordering for AtomicRMW"); |
3311 return; | 3310 return; |
3312 } | 3311 } |
3313 lowerAtomicRMW( | 3312 lowerAtomicRMW( |
3314 Instr->getDest(), | 3313 Instr->getDest(), |
(...skipping 518 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3833 } | 3832 } |
3834 constexpr bool Locked = true; | 3833 constexpr bool Locked = true; |
3835 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3834 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
3836 _br(Traits::Cond::Br_ne, Label); | 3835 _br(Traits::Cond::Br_ne, Label); |
3837 if (!IsXchg8b) { | 3836 if (!IsXchg8b) { |
3838 // If Val is a variable, model the extended live range of Val through | 3837 // If Val is a variable, model the extended live range of Val through |
3839 // the end of the loop, since it will be re-used by the loop. | 3838 // the end of the loop, since it will be re-used by the loop. |
3840 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3839 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) { |
3841 auto *ValLo = llvm::cast<Variable>(loOperand(ValVar)); | 3840 auto *ValLo = llvm::cast<Variable>(loOperand(ValVar)); |
3842 auto *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); | 3841 auto *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); |
3843 Context.insert(InstFakeUse::create(Func, ValLo)); | 3842 Context.insert<InstFakeUse>(ValLo); |
3844 Context.insert(InstFakeUse::create(Func, ValHi)); | 3843 Context.insert<InstFakeUse>(ValHi); |
3845 } | 3844 } |
3846 } else { | 3845 } else { |
3847 // For xchg, the loop is slightly smaller and ebx/ecx are used. | 3846 // For xchg, the loop is slightly smaller and ebx/ecx are used. |
3848 Context.insert(InstFakeUse::create(Func, T_ebx)); | 3847 Context.insert<InstFakeUse>(T_ebx); |
3849 Context.insert(InstFakeUse::create(Func, T_ecx)); | 3848 Context.insert<InstFakeUse>(T_ecx); |
3850 } | 3849 } |
3851 // The address base (if any) is also reused in the loop. | 3850 // The address base (if any) is also reused in the loop. |
3852 if (Variable *Base = Addr->getBase()) | 3851 if (Variable *Base = Addr->getBase()) |
3853 Context.insert(InstFakeUse::create(Func, Base)); | 3852 Context.insert<InstFakeUse>(Base); |
3854 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3853 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
3855 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3854 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
3856 _mov(DestLo, T_eax); | 3855 _mov(DestLo, T_eax); |
3857 _mov(DestHi, T_edx); | 3856 _mov(DestHi, T_edx); |
3858 return; | 3857 return; |
3859 } | 3858 } |
3860 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3859 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
3861 int32_t Eax; | 3860 int32_t Eax; |
3862 switch (Ty) { | 3861 switch (Ty) { |
3863 default: | 3862 default: |
3864 llvm_unreachable("Bad type for atomicRMW"); | 3863 llvm_unreachable("Bad type for atomicRMW"); |
3865 // fallthrough | 3864 // fallthrough |
3866 case IceType_i32: | 3865 case IceType_i32: |
3867 Eax = Traits::RegisterSet::Reg_eax; | 3866 Eax = Traits::RegisterSet::Reg_eax; |
3868 break; | 3867 break; |
3869 case IceType_i16: | 3868 case IceType_i16: |
3870 Eax = Traits::RegisterSet::Reg_ax; | 3869 Eax = Traits::RegisterSet::Reg_ax; |
3871 break; | 3870 break; |
3872 case IceType_i8: | 3871 case IceType_i8: |
3873 Eax = Traits::RegisterSet::Reg_al; | 3872 Eax = Traits::RegisterSet::Reg_al; |
3874 break; | 3873 break; |
3875 } | 3874 } |
3876 Variable *T_eax = makeReg(Ty, Eax); | 3875 Variable *T_eax = makeReg(Ty, Eax); |
3877 _mov(T_eax, Addr); | 3876 _mov(T_eax, Addr); |
3878 typename Traits::Insts::Label *Label = | 3877 auto *Label = Context.insert<typename Traits::Insts::Label>(this); |
3879 Traits::Insts::Label::create(Func, this); | |
3880 Context.insert(Label); | |
3881 // We want to pick a different register for T than Eax, so don't use | 3878 // We want to pick a different register for T than Eax, so don't use |
3882 // _mov(T == nullptr, T_eax). | 3879 // _mov(T == nullptr, T_eax). |
3883 Variable *T = makeReg(Ty); | 3880 Variable *T = makeReg(Ty); |
3884 _mov(T, T_eax); | 3881 _mov(T, T_eax); |
3885 (this->*Op_Lo)(T, Val); | 3882 (this->*Op_Lo)(T, Val); |
3886 constexpr bool Locked = true; | 3883 constexpr bool Locked = true; |
3887 _cmpxchg(Addr, T_eax, T, Locked); | 3884 _cmpxchg(Addr, T_eax, T, Locked); |
3888 _br(Traits::Cond::Br_ne, Label); | 3885 _br(Traits::Cond::Br_ne, Label); |
3889 // If Val is a variable, model the extended live range of Val through | 3886 // If Val is a variable, model the extended live range of Val through |
3890 // the end of the loop, since it will be re-used by the loop. | 3887 // the end of the loop, since it will be re-used by the loop. |
3891 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3888 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) { |
3892 Context.insert(InstFakeUse::create(Func, ValVar)); | 3889 Context.insert<InstFakeUse>(ValVar); |
3893 } | 3890 } |
3894 // The address base (if any) is also reused in the loop. | 3891 // The address base (if any) is also reused in the loop. |
3895 if (Variable *Base = Addr->getBase()) | 3892 if (Variable *Base = Addr->getBase()) |
3896 Context.insert(InstFakeUse::create(Func, Base)); | 3893 Context.insert<InstFakeUse>(Base); |
3897 _mov(Dest, T_eax); | 3894 _mov(Dest, T_eax); |
3898 } | 3895 } |
3899 | 3896 |
3900 /// Lowers count {trailing, leading} zeros intrinsic. | 3897 /// Lowers count {trailing, leading} zeros intrinsic. |
3901 /// | 3898 /// |
3902 /// We could do constant folding here, but that should have | 3899 /// We could do constant folding here, but that should have |
3903 /// been done by the front-end/middle-end optimizations. | 3900 /// been done by the front-end/middle-end optimizations. |
3904 template <class Machine> | 3901 template <class Machine> |
3905 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, | 3902 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, |
3906 Operand *FirstVal, | 3903 Operand *FirstVal, |
(...skipping 746 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4653 // something else. We only care if it is Variable. | 4650 // something else. We only care if it is Variable. |
4654 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd); | 4651 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd); |
4655 if (Var == nullptr) | 4652 if (Var == nullptr) |
4656 return; | 4653 return; |
4657 // We use lowerStore() to copy out-args onto the stack. This creates a memory | 4654 // We use lowerStore() to copy out-args onto the stack. This creates a memory |
4658 // operand with the stack pointer as the base register. Don't do bounds | 4655 // operand with the stack pointer as the base register. Don't do bounds |
4659 // checks on that. | 4656 // checks on that. |
4660 if (Var->getRegNum() == Traits::RegisterSet::Reg_esp) | 4657 if (Var->getRegNum() == Traits::RegisterSet::Reg_esp) |
4661 return; | 4658 return; |
4662 | 4659 |
4663 typename Traits::Insts::Label *Label = | 4660 auto *Label = Traits::Insts::Label::create(Func, this); |
4664 Traits::Insts::Label::create(Func, this); | |
4665 _cmp(Opnd, Ctx->getConstantZero(IceType_i32)); | 4661 _cmp(Opnd, Ctx->getConstantZero(IceType_i32)); |
4666 _br(Traits::Cond::Br_e, Label); | 4662 _br(Traits::Cond::Br_e, Label); |
4667 _cmp(Opnd, Ctx->getConstantInt32(1)); | 4663 _cmp(Opnd, Ctx->getConstantInt32(1)); |
4668 _br(Traits::Cond::Br_e, Label); | 4664 _br(Traits::Cond::Br_e, Label); |
4669 Context.insert(Label); | 4665 Context.insert(Label); |
4670 } | 4666 } |
4671 | 4667 |
4672 template <class Machine> | 4668 template <class Machine> |
4673 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) { | 4669 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) { |
4674 // A Load instruction can be treated the same as an Assign instruction, after | 4670 // A Load instruction can be treated the same as an Assign instruction, after |
(...skipping 29 matching lines...) Expand all Loading... |
4704 Constant *OffsetOp = nullptr; | 4700 Constant *OffsetOp = nullptr; |
4705 if (Relocatable == nullptr) { | 4701 if (Relocatable == nullptr) { |
4706 OffsetOp = Ctx->getConstantInt32(Offset); | 4702 OffsetOp = Ctx->getConstantInt32(Offset); |
4707 } else { | 4703 } else { |
4708 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, | 4704 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, |
4709 Relocatable->getName(), | 4705 Relocatable->getName(), |
4710 Relocatable->getSuppressMangling()); | 4706 Relocatable->getSuppressMangling()); |
4711 } | 4707 } |
4712 Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp, | 4708 Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp, |
4713 Index, Shift, SegmentReg); | 4709 Index, Shift, SegmentReg); |
4714 Context.insert(InstLoad::create(Func, Dest, Addr)); | 4710 Context.insert<InstLoad>(Dest, Addr); |
4715 } | 4711 } |
4716 } | 4712 } |
4717 | 4713 |
4718 template <class Machine> | 4714 template <class Machine> |
4719 void TargetX86Base<Machine>::randomlyInsertNop(float Probability, | 4715 void TargetX86Base<Machine>::randomlyInsertNop(float Probability, |
4720 RandomNumberGenerator &RNG) { | 4716 RandomNumberGenerator &RNG) { |
4721 RandomNumberGeneratorWrapper RNGW(RNG); | 4717 RandomNumberGeneratorWrapper RNGW(RNG); |
4722 if (RNGW.getTrueWithProbability(Probability)) { | 4718 if (RNGW.getTrueWithProbability(Probability)) { |
4723 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); | 4719 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); |
4724 } | 4720 } |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4768 | 4764 |
4769 template <class Machine> | 4765 template <class Machine> |
4770 void TargetX86Base<Machine>::lowerSelectMove(Variable *Dest, | 4766 void TargetX86Base<Machine>::lowerSelectMove(Variable *Dest, |
4771 typename Traits::Cond::BrCond Cond, | 4767 typename Traits::Cond::BrCond Cond, |
4772 Operand *SrcT, Operand *SrcF) { | 4768 Operand *SrcT, Operand *SrcF) { |
4773 Type DestTy = Dest->getType(); | 4769 Type DestTy = Dest->getType(); |
4774 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { | 4770 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { |
4775 // The cmov instruction doesn't allow 8-bit or FP operands, so we need | 4771 // The cmov instruction doesn't allow 8-bit or FP operands, so we need |
4776 // explicit control flow. | 4772 // explicit control flow. |
4777 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: | 4773 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: |
4778 typename Traits::Insts::Label *Label = | 4774 auto *Label = Traits::Insts::Label::create(Func, this); |
4779 Traits::Insts::Label::create(Func, this); | |
4780 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); | 4775 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); |
4781 _mov(Dest, SrcT); | 4776 _mov(Dest, SrcT); |
4782 _br(Cond, Label); | 4777 _br(Cond, Label); |
4783 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); | 4778 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); |
4784 _redefined(_mov(Dest, SrcF)); | 4779 _redefined(_mov(Dest, SrcF)); |
4785 Context.insert(Label); | 4780 Context.insert(Label); |
4786 return; | 4781 return; |
4787 } | 4782 } |
4788 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t | 4783 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t |
4789 // But if SrcT is immediate, we might be able to do better, as the cmov | 4784 // But if SrcT is immediate, we might be able to do better, as the cmov |
(...skipping 221 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5011 Constant *OffsetOp = nullptr; | 5006 Constant *OffsetOp = nullptr; |
5012 if (Relocatable == nullptr) { | 5007 if (Relocatable == nullptr) { |
5013 OffsetOp = Ctx->getConstantInt32(Offset); | 5008 OffsetOp = Ctx->getConstantInt32(Offset); |
5014 } else { | 5009 } else { |
5015 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, | 5010 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, |
5016 Relocatable->getName(), | 5011 Relocatable->getName(), |
5017 Relocatable->getSuppressMangling()); | 5012 Relocatable->getSuppressMangling()); |
5018 } | 5013 } |
5019 Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp, | 5014 Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp, |
5020 Index, Shift, SegmentReg); | 5015 Index, Shift, SegmentReg); |
5021 auto *NewStore = InstStore::create(Func, Data, Addr); | 5016 auto *NewStore = Context.insert<InstStore>(Data, Addr); |
5022 if (Inst->getDest()) | 5017 if (Inst->getDest()) |
5023 NewStore->setRmwBeacon(Inst->getRmwBeacon()); | 5018 NewStore->setRmwBeacon(Inst->getRmwBeacon()); |
5024 Context.insert(NewStore); | |
5025 } | 5019 } |
5026 } | 5020 } |
5027 | 5021 |
5028 template <class Machine> | 5022 template <class Machine> |
5029 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison, | 5023 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison, |
5030 uint64_t Min, uint64_t Max) { | 5024 uint64_t Min, uint64_t Max) { |
5031 // TODO(ascull): 64-bit should not reach here but only because it is not | 5025 // TODO(ascull): 64-bit should not reach here but only because it is not |
5032 // implemented yet. This should be able to handle the 64-bit case. | 5026 // implemented yet. This should be able to handle the 64-bit case. |
5033 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64); | 5027 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64); |
5034 // Subtracting 0 is a nop so don't do it | 5028 // Subtracting 0 is a nop so don't do it |
(...skipping 231 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5266 Type Ty = Dest->getType(); | 5260 Type Ty = Dest->getType(); |
5267 Type ElementTy = typeElementType(Ty); | 5261 Type ElementTy = typeElementType(Ty); |
5268 SizeT NumElements = typeNumElements(Ty); | 5262 SizeT NumElements = typeNumElements(Ty); |
5269 | 5263 |
5270 Operand *T = Ctx->getConstantUndef(Ty); | 5264 Operand *T = Ctx->getConstantUndef(Ty); |
5271 for (SizeT I = 0; I < NumElements; ++I) { | 5265 for (SizeT I = 0; I < NumElements; ++I) { |
5272 Constant *Index = Ctx->getConstantInt32(I); | 5266 Constant *Index = Ctx->getConstantInt32(I); |
5273 | 5267 |
5274 // Extract the next two inputs. | 5268 // Extract the next two inputs. |
5275 Variable *Op0 = Func->makeVariable(ElementTy); | 5269 Variable *Op0 = Func->makeVariable(ElementTy); |
5276 Context.insert(InstExtractElement::create(Func, Op0, Src0, Index)); | 5270 Context.insert<InstExtractElement>(Op0, Src0, Index); |
5277 Variable *Op1 = Func->makeVariable(ElementTy); | 5271 Variable *Op1 = Func->makeVariable(ElementTy); |
5278 Context.insert(InstExtractElement::create(Func, Op1, Src1, Index)); | 5272 Context.insert<InstExtractElement>(Op1, Src1, Index); |
5279 | 5273 |
5280 // Perform the arithmetic as a scalar operation. | 5274 // Perform the arithmetic as a scalar operation. |
5281 Variable *Res = Func->makeVariable(ElementTy); | 5275 Variable *Res = Func->makeVariable(ElementTy); |
5282 auto *Arith = InstArithmetic::create(Func, Kind, Res, Op0, Op1); | 5276 auto *Arith = Context.insert<InstArithmetic>(Kind, Res, Op0, Op1); |
5283 Context.insert(Arith); | |
5284 // We might have created an operation that needed a helper call. | 5277 // We might have created an operation that needed a helper call. |
5285 genTargetHelperCallFor(Arith); | 5278 genTargetHelperCallFor(Arith); |
5286 | 5279 |
5287 // Insert the result into position. | 5280 // Insert the result into position. |
5288 Variable *DestT = Func->makeVariable(Ty); | 5281 Variable *DestT = Func->makeVariable(Ty); |
5289 Context.insert(InstInsertElement::create(Func, DestT, T, Res, Index)); | 5282 Context.insert<InstInsertElement>(DestT, T, Res, Index); |
5290 T = DestT; | 5283 T = DestT; |
5291 } | 5284 } |
5292 | 5285 |
5293 Context.insert(InstAssign::create(Func, Dest, T)); | 5286 Context.insert<InstAssign>(Dest, T); |
5294 } | 5287 } |
5295 | 5288 |
5296 /// The following pattern occurs often in lowered C and C++ code: | 5289 /// The following pattern occurs often in lowered C and C++ code: |
5297 /// | 5290 /// |
5298 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 | 5291 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
5299 /// %cmp.ext = sext <n x i1> %cmp to <n x ty> | 5292 /// %cmp.ext = sext <n x i1> %cmp to <n x ty> |
5300 /// | 5293 /// |
5301 /// We can eliminate the sext operation by copying the result of pcmpeqd, | 5294 /// We can eliminate the sext operation by copying the result of pcmpeqd, |
5302 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the | 5295 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the |
5303 /// sext operation. | 5296 /// sext operation. |
(...skipping 270 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5574 break; | 5567 break; |
5575 case IceType_i16: | 5568 case IceType_i16: |
5576 assert(Src0->getType() == IceType_v16i1); | 5569 assert(Src0->getType() == IceType_v16i1); |
5577 HelperName = H_bitcast_16xi1_i16; | 5570 HelperName = H_bitcast_16xi1_i16; |
5578 break; | 5571 break; |
5579 case IceType_v8i1: { | 5572 case IceType_v8i1: { |
5580 assert(Src0->getType() == IceType_i8); | 5573 assert(Src0->getType() == IceType_i8); |
5581 HelperName = H_bitcast_i8_8xi1; | 5574 HelperName = H_bitcast_i8_8xi1; |
5582 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); | 5575 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); |
5583 // Arguments to functions are required to be at least 32 bits wide. | 5576 // Arguments to functions are required to be at least 32 bits wide. |
5584 Context.insert(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); | 5577 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); |
5585 Src0 = Src0AsI32; | 5578 Src0 = Src0AsI32; |
5586 } break; | 5579 } break; |
5587 case IceType_v16i1: { | 5580 case IceType_v16i1: { |
5588 assert(Src0->getType() == IceType_i16); | 5581 assert(Src0->getType() == IceType_i16); |
5589 HelperName = H_bitcast_i16_16xi1; | 5582 HelperName = H_bitcast_i16_16xi1; |
5590 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); | 5583 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); |
5591 // Arguments to functions are required to be at least 32 bits wide. | 5584 // Arguments to functions are required to be at least 32 bits wide. |
5592 Context.insert(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); | 5585 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); |
5593 Src0 = Src0AsI32; | 5586 Src0 = Src0AsI32; |
5594 } break; | 5587 } break; |
5595 } | 5588 } |
5596 } break; | 5589 } break; |
5597 } | 5590 } |
5598 constexpr SizeT MaxSrcs = 1; | 5591 constexpr SizeT MaxSrcs = 1; |
5599 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); | 5592 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); |
5600 Call->addArg(Src0); | 5593 Call->addArg(Src0); |
5601 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); | 5594 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); |
5602 Context.insert(Call); | 5595 Context.insert(Call); |
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5717 case IceType_i1: | 5710 case IceType_i1: |
5718 case IceType_i8: | 5711 case IceType_i8: |
5719 case IceType_i16: | 5712 case IceType_i16: |
5720 case IceType_i32: | 5713 case IceType_i32: |
5721 case IceType_i64: | 5714 case IceType_i64: |
5722 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. | 5715 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. |
5723 _mov(Reg, Ctx->getConstantZero(Ty)); | 5716 _mov(Reg, Ctx->getConstantZero(Ty)); |
5724 break; | 5717 break; |
5725 case IceType_f32: | 5718 case IceType_f32: |
5726 case IceType_f64: | 5719 case IceType_f64: |
5727 Context.insert(InstFakeDef::create(Func, Reg)); | 5720 Context.insert<InstFakeDef>(Reg); |
5728 _xorps(Reg, Reg); | 5721 _xorps(Reg, Reg); |
5729 break; | 5722 break; |
5730 default: | 5723 default: |
5731 // All vector types use the same pxor instruction. | 5724 // All vector types use the same pxor instruction. |
5732 assert(isVectorType(Ty)); | 5725 assert(isVectorType(Ty)); |
5733 Context.insert(InstFakeDef::create(Func, Reg)); | 5726 Context.insert<InstFakeDef>(Reg); |
5734 _pxor(Reg, Reg); | 5727 _pxor(Reg, Reg); |
5735 break; | 5728 break; |
5736 } | 5729 } |
5737 return Reg; | 5730 return Reg; |
5738 } | 5731 } |
5739 | 5732 |
5740 // There is no support for loading or emitting vector constants, so the vector | 5733 // There is no support for loading or emitting vector constants, so the vector |
5741 // values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are | 5734 // values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are |
5742 // initialized with register operations. | 5735 // initialized with register operations. |
5743 // | 5736 // |
5744 // TODO(wala): Add limited support for vector constants so that complex | 5737 // TODO(wala): Add limited support for vector constants so that complex |
5745 // initialization in registers is unnecessary. | 5738 // initialization in registers is unnecessary. |
5746 | 5739 |
5747 template <class Machine> | 5740 template <class Machine> |
5748 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) { | 5741 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
5749 return makeZeroedRegister(Ty, RegNum); | 5742 return makeZeroedRegister(Ty, RegNum); |
5750 } | 5743 } |
5751 | 5744 |
5752 template <class Machine> | 5745 template <class Machine> |
5753 Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty, | 5746 Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty, |
5754 int32_t RegNum) { | 5747 int32_t RegNum) { |
5755 Variable *MinusOnes = makeReg(Ty, RegNum); | 5748 Variable *MinusOnes = makeReg(Ty, RegNum); |
5756 // Insert a FakeDef so the live range of MinusOnes is not overestimated. | 5749 // Insert a FakeDef so the live range of MinusOnes is not overestimated. |
5757 Context.insert(InstFakeDef::create(Func, MinusOnes)); | 5750 Context.insert<InstFakeDef>(MinusOnes); |
5758 _pcmpeq(MinusOnes, MinusOnes); | 5751 _pcmpeq(MinusOnes, MinusOnes); |
5759 return MinusOnes; | 5752 return MinusOnes; |
5760 } | 5753 } |
5761 | 5754 |
5762 template <class Machine> | 5755 template <class Machine> |
5763 Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) { | 5756 Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) { |
5764 Variable *Dest = makeVectorOfZeros(Ty, RegNum); | 5757 Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
5765 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 5758 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
5766 _psub(Dest, MinusOne); | 5759 _psub(Dest, MinusOne); |
5767 return Dest; | 5760 return Dest; |
(...skipping 289 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6057 template <class Machine> | 6050 template <class Machine> |
6058 Operand *TargetX86Base<Machine>::legalizeUndef(Operand *From, int32_t RegNum) { | 6051 Operand *TargetX86Base<Machine>::legalizeUndef(Operand *From, int32_t RegNum) { |
6059 Type Ty = From->getType(); | 6052 Type Ty = From->getType(); |
6060 if (llvm::isa<ConstantUndef>(From)) { | 6053 if (llvm::isa<ConstantUndef>(From)) { |
6061 // Lower undefs to zero. Another option is to lower undefs to an | 6054 // Lower undefs to zero. Another option is to lower undefs to an |
6062 // uninitialized register; however, using an uninitialized register results | 6055 // uninitialized register; however, using an uninitialized register results |
6063 // in less predictable code. | 6056 // in less predictable code. |
6064 // | 6057 // |
6065 // If in the future the implementation is changed to lower undef values to | 6058 // If in the future the implementation is changed to lower undef values to |
6066 // uninitialized registers, a FakeDef will be needed: | 6059 // uninitialized registers, a FakeDef will be needed: |
6067 // Context.insert(InstFakeDef::create(Func, Reg)); | 6060 // Context.insert<InstFakeDef>(Reg); |
6068 // This is in order to ensure that the live range of Reg is not | 6061 // This is in order to ensure that the live range of Reg is not |
6069 // overestimated. If the constant being lowered is a 64 bit value, then | 6062 // overestimated. If the constant being lowered is a 64 bit value, then |
6070 // the result should be split and the lo and hi components will need to go | 6063 // the result should be split and the lo and hi components will need to go |
6071 // in uninitialized registers. | 6064 // in uninitialized registers. |
6072 if (isVectorType(Ty)) | 6065 if (isVectorType(Ty)) |
6073 return makeVectorOfZeros(Ty, RegNum); | 6066 return makeVectorOfZeros(Ty, RegNum); |
6074 return Ctx->getConstantZero(Ty); | 6067 return Ctx->getConstantZero(Ty); |
6075 } | 6068 } |
6076 return From; | 6069 return From; |
6077 } | 6070 } |
(...skipping 332 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6410 } | 6403 } |
6411 // the offset is not eligible for blinding or pooling, return the original | 6404 // the offset is not eligible for blinding or pooling, return the original |
6412 // mem operand | 6405 // mem operand |
6413 return MemOperand; | 6406 return MemOperand; |
6414 } | 6407 } |
6415 | 6408 |
6416 } // end of namespace X86Internal | 6409 } // end of namespace X86Internal |
6417 } // end of namespace Ice | 6410 } // end of namespace Ice |
6418 | 6411 |
6419 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 6412 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |