| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 998 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1009 if (const auto *ConstantTotalSize = | 1009 if (const auto *ConstantTotalSize = |
| 1010 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { | 1010 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { |
| 1011 const uint32_t Value = | 1011 const uint32_t Value = |
| 1012 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment); | 1012 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment); |
| 1013 if (!UseFramePointer) { | 1013 if (!UseFramePointer) { |
| 1014 // If we don't need a Frame Pointer, this alloca has a known offset to the | 1014 // If we don't need a Frame Pointer, this alloca has a known offset to the |
| 1015 // stack pointer. We don't need adjust the stack pointer, nor assign any | 1015 // stack pointer. We don't need adjust the stack pointer, nor assign any |
| 1016 // value to Dest, as Dest is rematerializable. | 1016 // value to Dest, as Dest is rematerializable. |
| 1017 assert(Dest->isRematerializable()); | 1017 assert(Dest->isRematerializable()); |
| 1018 FixedAllocaSizeBytes += Value; | 1018 FixedAllocaSizeBytes += Value; |
| 1019 Context.insert(InstFakeDef::create(Func, Dest)); | 1019 Context.insert<InstFakeDef>(Dest); |
| 1020 } else { | 1020 } else { |
| 1021 _sub(esp, Ctx->getConstantInt32(Value)); | 1021 _sub(esp, Ctx->getConstantInt32(Value)); |
| 1022 } | 1022 } |
| 1023 } else { | 1023 } else { |
| 1024 // Non-constant sizes need to be adjusted to the next highest multiple of | 1024 // Non-constant sizes need to be adjusted to the next highest multiple of |
| 1025 // the required alignment at runtime. | 1025 // the required alignment at runtime. |
| 1026 Variable *T = makeReg(IceType_i32); | 1026 Variable *T = makeReg(IceType_i32); |
| 1027 _mov(T, TotalSize); | 1027 _mov(T, TotalSize); |
| 1028 _add(T, Ctx->getConstantInt32(Alignment - 1)); | 1028 _add(T, Ctx->getConstantInt32(Alignment - 1)); |
| 1029 _and(T, Ctx->getConstantInt32(-Alignment)); | 1029 _and(T, Ctx->getConstantInt32(-Alignment)); |
| (...skipping 321 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1351 Context.insert(Label); | 1351 Context.insert(Label); |
| 1352 _mov(DestLo, T_2); | 1352 _mov(DestLo, T_2); |
| 1353 _mov(DestHi, T_3); | 1353 _mov(DestHi, T_3); |
| 1354 } | 1354 } |
| 1355 } | 1355 } |
| 1356 | 1356 |
| 1357 template <class Machine> | 1357 template <class Machine> |
| 1358 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 1358 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { |
| 1359 Variable *Dest = Inst->getDest(); | 1359 Variable *Dest = Inst->getDest(); |
| 1360 if (Dest->isRematerializable()) { | 1360 if (Dest->isRematerializable()) { |
| 1361 Context.insert(InstFakeDef::create(Func, Dest)); | 1361 Context.insert<InstFakeDef>(Dest); |
| 1362 return; | 1362 return; |
| 1363 } | 1363 } |
| 1364 Type Ty = Dest->getType(); | 1364 Type Ty = Dest->getType(); |
| 1365 Operand *Src0 = legalize(Inst->getSrc(0)); | 1365 Operand *Src0 = legalize(Inst->getSrc(0)); |
| 1366 Operand *Src1 = legalize(Inst->getSrc(1)); | 1366 Operand *Src1 = legalize(Inst->getSrc(1)); |
| 1367 if (Inst->isCommutative()) { | 1367 if (Inst->isCommutative()) { |
| 1368 uint32_t SwapCount = 0; | 1368 uint32_t SwapCount = 0; |
| 1369 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) { | 1369 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) { |
| 1370 std::swap(Src0, Src1); | 1370 std::swap(Src0, Src1); |
| 1371 ++SwapCount; | 1371 ++SwapCount; |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1469 // The mul instruction cannot take an immediate operand. | 1469 // The mul instruction cannot take an immediate operand. |
| 1470 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem); | 1470 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem); |
| 1471 _mov(T_1, Src0Hi); | 1471 _mov(T_1, Src0Hi); |
| 1472 _imul(T_1, Src1Lo); | 1472 _imul(T_1, Src1Lo); |
| 1473 _mov(T_2, Src1Hi); | 1473 _mov(T_2, Src1Hi); |
| 1474 _imul(T_2, Src0Lo); | 1474 _imul(T_2, Src0Lo); |
| 1475 _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax); | 1475 _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax); |
| 1476 _mul(T_4Lo, T_3, Src1Lo); | 1476 _mul(T_4Lo, T_3, Src1Lo); |
| 1477 // The mul instruction produces two dest variables, edx:eax. We create a | 1477 // The mul instruction produces two dest variables, edx:eax. We create a |
| 1478 // fake definition of edx to account for this. | 1478 // fake definition of edx to account for this. |
| 1479 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo)); | 1479 Context.insert<InstFakeDef>(T_4Hi, T_4Lo); |
| 1480 _mov(DestLo, T_4Lo); | 1480 _mov(DestLo, T_4Lo); |
| 1481 _add(T_4Hi, T_1); | 1481 _add(T_4Hi, T_1); |
| 1482 _add(T_4Hi, T_2); | 1482 _add(T_4Hi, T_2); |
| 1483 _mov(DestHi, T_4Hi); | 1483 _mov(DestHi, T_4Hi); |
| 1484 } break; | 1484 } break; |
| 1485 case InstArithmetic::Shl: | 1485 case InstArithmetic::Shl: |
| 1486 case InstArithmetic::Lshr: | 1486 case InstArithmetic::Lshr: |
| 1487 case InstArithmetic::Ashr: | 1487 case InstArithmetic::Ashr: |
| 1488 lowerShift64(Inst->getOp(), Src0Lo, Src0Hi, Src1Lo, DestLo, DestHi); | 1488 lowerShift64(Inst->getOp(), Src0Lo, Src0Hi, Src1Lo, DestLo, DestHi); |
| 1489 break; | 1489 break; |
| (...skipping 414 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1904 case InstArithmetic::Frem: | 1904 case InstArithmetic::Frem: |
| 1905 llvm::report_fatal_error("Helper call was expected"); | 1905 llvm::report_fatal_error("Helper call was expected"); |
| 1906 break; | 1906 break; |
| 1907 } | 1907 } |
| 1908 } | 1908 } |
| 1909 | 1909 |
| 1910 template <class Machine> | 1910 template <class Machine> |
| 1911 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { | 1911 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { |
| 1912 Variable *Dest = Inst->getDest(); | 1912 Variable *Dest = Inst->getDest(); |
| 1913 if (Dest->isRematerializable()) { | 1913 if (Dest->isRematerializable()) { |
| 1914 Context.insert(InstFakeDef::create(Func, Dest)); | 1914 Context.insert<InstFakeDef>(Dest); |
| 1915 return; | 1915 return; |
| 1916 } | 1916 } |
| 1917 Operand *Src = Inst->getSrc(0); | 1917 Operand *Src = Inst->getSrc(0); |
| 1918 assert(Dest->getType() == Src->getType()); | 1918 assert(Dest->getType() == Src->getType()); |
| 1919 lowerMove(Dest, Src, false); | 1919 lowerMove(Dest, Src, false); |
| 1920 } | 1920 } |
| 1921 | 1921 |
| 1922 template <class Machine> | 1922 template <class Machine> |
| 1923 void TargetX86Base<Machine>::lowerBr(const InstBr *Br) { | 1923 void TargetX86Base<Machine>::lowerBr(const InstBr *Br) { |
| 1924 if (Br->isUnconditional()) { | 1924 if (Br->isUnconditional()) { |
| (...skipping 445 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2370 | 2370 |
| 2371 Variable *T_Lo = nullptr, *T_Hi = nullptr; | 2371 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
| 2372 auto *SpillLo = Traits::VariableSplit::create( | 2372 auto *SpillLo = Traits::VariableSplit::create( |
| 2373 Func, Spill, Traits::VariableSplit::Low); | 2373 Func, Spill, Traits::VariableSplit::Low); |
| 2374 auto *SpillHi = Traits::VariableSplit::create( | 2374 auto *SpillHi = Traits::VariableSplit::create( |
| 2375 Func, Spill, Traits::VariableSplit::High); | 2375 Func, Spill, Traits::VariableSplit::High); |
| 2376 _mov(T_Lo, loOperand(Src0)); | 2376 _mov(T_Lo, loOperand(Src0)); |
| 2377 // Technically, the Spill is defined after the _store happens, but | 2377 // Technically, the Spill is defined after the _store happens, but |
| 2378 // SpillLo is considered a "use" of Spill so define Spill before it is | 2378 // SpillLo is considered a "use" of Spill so define Spill before it is |
| 2379 // used. | 2379 // used. |
| 2380 Context.insert(InstFakeDef::create(Func, Spill)); | 2380 Context.insert<InstFakeDef>(Spill); |
| 2381 _store(T_Lo, SpillLo); | 2381 _store(T_Lo, SpillLo); |
| 2382 _mov(T_Hi, hiOperand(Src0)); | 2382 _mov(T_Hi, hiOperand(Src0)); |
| 2383 _store(T_Hi, SpillHi); | 2383 _store(T_Hi, SpillHi); |
| 2384 _movq(Dest, Spill); | 2384 _movq(Dest, Spill); |
| 2385 } | 2385 } |
| 2386 } break; | 2386 } break; |
| 2387 case IceType_v8i1: { | 2387 case IceType_v8i1: { |
| 2388 llvm::report_fatal_error("Helper call was expected"); | 2388 llvm::report_fatal_error("Helper call was expected"); |
| 2389 } break; | 2389 } break; |
| 2390 case IceType_v16i1: { | 2390 case IceType_v16i1: { |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2443 } | 2443 } |
| 2444 | 2444 |
| 2445 if (InVectorElementTy == IceType_i32) { | 2445 if (InVectorElementTy == IceType_i32) { |
| 2446 _movd(ExtractedElementR, T); | 2446 _movd(ExtractedElementR, T); |
| 2447 } else { // Ty == IceType_f32 | 2447 } else { // Ty == IceType_f32 |
| 2448 // TODO(wala): _movss is only used here because _mov does not allow a | 2448 // TODO(wala): _movss is only used here because _mov does not allow a |
| 2449 // vector source and a scalar destination. _mov should be able to be | 2449 // vector source and a scalar destination. _mov should be able to be |
| 2450 // used here. | 2450 // used here. |
| 2451 // _movss is a binary instruction, so the FakeDef is needed to keep the | 2451 // _movss is a binary instruction, so the FakeDef is needed to keep the |
| 2452 // live range analysis consistent. | 2452 // live range analysis consistent. |
| 2453 Context.insert(InstFakeDef::create(Func, ExtractedElementR)); | 2453 Context.insert<InstFakeDef>(ExtractedElementR); |
| 2454 _movss(ExtractedElementR, T); | 2454 _movss(ExtractedElementR, T); |
| 2455 } | 2455 } |
| 2456 } else { | 2456 } else { |
| 2457 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); | 2457 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); |
| 2458 // Spill the value to a stack slot and do the extraction in memory. | 2458 // Spill the value to a stack slot and do the extraction in memory. |
| 2459 // | 2459 // |
| 2460 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support | 2460 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support |
| 2461 // for legalizing to mem is implemented. | 2461 // for legalizing to mem is implemented. |
| 2462 Variable *Slot = Func->makeVariable(Ty); | 2462 Variable *Slot = Func->makeVariable(Ty); |
| 2463 Slot->setMustNotHaveReg(); | 2463 Slot->setMustNotHaveReg(); |
| (...skipping 415 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2879 switch (Condition) { | 2879 switch (Condition) { |
| 2880 default: | 2880 default: |
| 2881 llvm_unreachable("unexpected condition"); | 2881 llvm_unreachable("unexpected condition"); |
| 2882 break; | 2882 break; |
| 2883 case InstIcmp::Eq: | 2883 case InstIcmp::Eq: |
| 2884 case InstIcmp::Ule: | 2884 case InstIcmp::Ule: |
| 2885 // Mov Src0HiRM first, because it was legalized most recently, and will | 2885 // Mov Src0HiRM first, because it was legalized most recently, and will |
| 2886 // sometimes avoid a move before the OR. | 2886 // sometimes avoid a move before the OR. |
| 2887 _mov(Temp, Src0HiRM); | 2887 _mov(Temp, Src0HiRM); |
| 2888 _or(Temp, Src0LoRM); | 2888 _or(Temp, Src0LoRM); |
| 2889 Context.insert(InstFakeUse::create(Func, Temp)); | 2889 Context.insert<InstFakeUse>(Temp); |
| 2890 setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer); | 2890 setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer); |
| 2891 return; | 2891 return; |
| 2892 case InstIcmp::Ne: | 2892 case InstIcmp::Ne: |
| 2893 case InstIcmp::Ugt: | 2893 case InstIcmp::Ugt: |
| 2894 // Mov Src0HiRM first, because it was legalized most recently, and will | 2894 // Mov Src0HiRM first, because it was legalized most recently, and will |
| 2895 // sometimes avoid a move before the OR. | 2895 // sometimes avoid a move before the OR. |
| 2896 _mov(Temp, Src0HiRM); | 2896 _mov(Temp, Src0HiRM); |
| 2897 _or(Temp, Src0LoRM); | 2897 _or(Temp, Src0LoRM); |
| 2898 Context.insert(InstFakeUse::create(Func, Temp)); | 2898 Context.insert<InstFakeUse>(Temp); |
| 2899 setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer); | 2899 setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer); |
| 2900 return; | 2900 return; |
| 2901 case InstIcmp::Uge: | 2901 case InstIcmp::Uge: |
| 2902 movOrConsumer(true, Dest, Consumer); | 2902 movOrConsumer(true, Dest, Consumer); |
| 2903 return; | 2903 return; |
| 2904 case InstIcmp::Ult: | 2904 case InstIcmp::Ult: |
| 2905 movOrConsumer(false, Dest, Consumer); | 2905 movOrConsumer(false, Dest, Consumer); |
| 2906 return; | 2906 return; |
| 2907 case InstIcmp::Sgt: | 2907 case InstIcmp::Sgt: |
| 2908 break; | 2908 break; |
| (...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3053 case InstArithmetic::Or: | 3053 case InstArithmetic::Or: |
| 3054 _mov(T, Src0); | 3054 _mov(T, Src0); |
| 3055 _or(T, Src1); | 3055 _or(T, Src1); |
| 3056 break; | 3056 break; |
| 3057 } | 3057 } |
| 3058 | 3058 |
| 3059 if (Consumer == nullptr) { | 3059 if (Consumer == nullptr) { |
| 3060 llvm::report_fatal_error("Expected a consumer instruction"); | 3060 llvm::report_fatal_error("Expected a consumer instruction"); |
| 3061 } | 3061 } |
| 3062 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { | 3062 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
| 3063 Context.insert(InstFakeUse::create(Func, T)); | 3063 Context.insert<InstFakeUse>(T); |
| 3064 Context.insert(InstFakeDef::create(Func, Dest)); | 3064 Context.insert<InstFakeDef>(Dest); |
| 3065 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); | 3065 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
| 3066 return; | 3066 return; |
| 3067 } | 3067 } |
| 3068 llvm::report_fatal_error("Unexpected consumer type"); | 3068 llvm::report_fatal_error("Unexpected consumer type"); |
| 3069 } | 3069 } |
| 3070 | 3070 |
| 3071 template <class Machine> | 3071 template <class Machine> |
| 3072 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { | 3072 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { |
| 3073 Operand *SourceVectNotLegalized = Inst->getSrc(0); | 3073 Operand *SourceVectNotLegalized = Inst->getSrc(0); |
| 3074 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); | 3074 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); |
| (...skipping 208 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3283 // anyway, since this is x86-32 and integer arithmetic only happens on | 3283 // anyway, since this is x86-32 and integer arithmetic only happens on |
| 3284 // 32-bit quantities. | 3284 // 32-bit quantities. |
| 3285 Variable *T = makeReg(IceType_f64); | 3285 Variable *T = makeReg(IceType_f64); |
| 3286 typename Traits::X86OperandMem *Addr = | 3286 typename Traits::X86OperandMem *Addr = |
| 3287 formMemoryOperand(Instr->getArg(0), IceType_f64); | 3287 formMemoryOperand(Instr->getArg(0), IceType_f64); |
| 3288 _movq(T, Addr); | 3288 _movq(T, Addr); |
| 3289 // Then cast the bits back out of the XMM register to the i64 Dest. | 3289 // Then cast the bits back out of the XMM register to the i64 Dest. |
| 3290 auto *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); | 3290 auto *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); |
| 3291 lowerCast(Cast); | 3291 lowerCast(Cast); |
| 3292 // Make sure that the atomic load isn't elided when unused. | 3292 // Make sure that the atomic load isn't elided when unused. |
| 3293 Context.insert(InstFakeUse::create(Func, Dest64On32->getLo())); | 3293 Context.insert<InstFakeUse>(Dest64On32->getLo()); |
| 3294 Context.insert(InstFakeUse::create(Func, Dest64On32->getHi())); | 3294 Context.insert<InstFakeUse>(Dest64On32->getHi()); |
| 3295 return; | 3295 return; |
| 3296 } | 3296 } |
| 3297 } | 3297 } |
| 3298 auto *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); | 3298 auto *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); |
| 3299 lowerLoad(Load); | 3299 lowerLoad(Load); |
| 3300 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. | 3300 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. |
| 3301 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert | 3301 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert |
| 3302 // the FakeUse on the last-inserted instruction's dest. | 3302 // the FakeUse on the last-inserted instruction's dest. |
| 3303 Context.insert( | 3303 Context.insert<InstFakeUse>(Context.getLastInserted()->getDest()); |
| 3304 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); | |
| 3305 return; | 3304 return; |
| 3306 } | 3305 } |
| 3307 case Intrinsics::AtomicRMW: | 3306 case Intrinsics::AtomicRMW: |
| 3308 if (!Intrinsics::isMemoryOrderValid( | 3307 if (!Intrinsics::isMemoryOrderValid( |
| 3309 ID, getConstantMemoryOrder(Instr->getArg(3)))) { | 3308 ID, getConstantMemoryOrder(Instr->getArg(3)))) { |
| 3310 Func->setError("Unexpected memory ordering for AtomicRMW"); | 3309 Func->setError("Unexpected memory ordering for AtomicRMW"); |
| 3311 return; | 3310 return; |
| 3312 } | 3311 } |
| 3313 lowerAtomicRMW( | 3312 lowerAtomicRMW( |
| 3314 Instr->getDest(), | 3313 Instr->getDest(), |
| (...skipping 518 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3833 } | 3832 } |
| 3834 constexpr bool Locked = true; | 3833 constexpr bool Locked = true; |
| 3835 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3834 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
| 3836 _br(Traits::Cond::Br_ne, Label); | 3835 _br(Traits::Cond::Br_ne, Label); |
| 3837 if (!IsXchg8b) { | 3836 if (!IsXchg8b) { |
| 3838 // If Val is a variable, model the extended live range of Val through | 3837 // If Val is a variable, model the extended live range of Val through |
| 3839 // the end of the loop, since it will be re-used by the loop. | 3838 // the end of the loop, since it will be re-used by the loop. |
| 3840 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3839 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) { |
| 3841 auto *ValLo = llvm::cast<Variable>(loOperand(ValVar)); | 3840 auto *ValLo = llvm::cast<Variable>(loOperand(ValVar)); |
| 3842 auto *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); | 3841 auto *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); |
| 3843 Context.insert(InstFakeUse::create(Func, ValLo)); | 3842 Context.insert<InstFakeUse>(ValLo); |
| 3844 Context.insert(InstFakeUse::create(Func, ValHi)); | 3843 Context.insert<InstFakeUse>(ValHi); |
| 3845 } | 3844 } |
| 3846 } else { | 3845 } else { |
| 3847 // For xchg, the loop is slightly smaller and ebx/ecx are used. | 3846 // For xchg, the loop is slightly smaller and ebx/ecx are used. |
| 3848 Context.insert(InstFakeUse::create(Func, T_ebx)); | 3847 Context.insert<InstFakeUse>(T_ebx); |
| 3849 Context.insert(InstFakeUse::create(Func, T_ecx)); | 3848 Context.insert<InstFakeUse>(T_ecx); |
| 3850 } | 3849 } |
| 3851 // The address base (if any) is also reused in the loop. | 3850 // The address base (if any) is also reused in the loop. |
| 3852 if (Variable *Base = Addr->getBase()) | 3851 if (Variable *Base = Addr->getBase()) |
| 3853 Context.insert(InstFakeUse::create(Func, Base)); | 3852 Context.insert<InstFakeUse>(Base); |
| 3854 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3853 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 3855 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3854 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 3856 _mov(DestLo, T_eax); | 3855 _mov(DestLo, T_eax); |
| 3857 _mov(DestHi, T_edx); | 3856 _mov(DestHi, T_edx); |
| 3858 return; | 3857 return; |
| 3859 } | 3858 } |
| 3860 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3859 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
| 3861 int32_t Eax; | 3860 int32_t Eax; |
| 3862 switch (Ty) { | 3861 switch (Ty) { |
| 3863 default: | 3862 default: |
| 3864 llvm_unreachable("Bad type for atomicRMW"); | 3863 llvm_unreachable("Bad type for atomicRMW"); |
| 3865 // fallthrough | 3864 // fallthrough |
| 3866 case IceType_i32: | 3865 case IceType_i32: |
| 3867 Eax = Traits::RegisterSet::Reg_eax; | 3866 Eax = Traits::RegisterSet::Reg_eax; |
| 3868 break; | 3867 break; |
| 3869 case IceType_i16: | 3868 case IceType_i16: |
| 3870 Eax = Traits::RegisterSet::Reg_ax; | 3869 Eax = Traits::RegisterSet::Reg_ax; |
| 3871 break; | 3870 break; |
| 3872 case IceType_i8: | 3871 case IceType_i8: |
| 3873 Eax = Traits::RegisterSet::Reg_al; | 3872 Eax = Traits::RegisterSet::Reg_al; |
| 3874 break; | 3873 break; |
| 3875 } | 3874 } |
| 3876 Variable *T_eax = makeReg(Ty, Eax); | 3875 Variable *T_eax = makeReg(Ty, Eax); |
| 3877 _mov(T_eax, Addr); | 3876 _mov(T_eax, Addr); |
| 3878 typename Traits::Insts::Label *Label = | 3877 auto *Label = Context.insert<typename Traits::Insts::Label>(this); |
| 3879 Traits::Insts::Label::create(Func, this); | |
| 3880 Context.insert(Label); | |
| 3881 // We want to pick a different register for T than Eax, so don't use | 3878 // We want to pick a different register for T than Eax, so don't use |
| 3882 // _mov(T == nullptr, T_eax). | 3879 // _mov(T == nullptr, T_eax). |
| 3883 Variable *T = makeReg(Ty); | 3880 Variable *T = makeReg(Ty); |
| 3884 _mov(T, T_eax); | 3881 _mov(T, T_eax); |
| 3885 (this->*Op_Lo)(T, Val); | 3882 (this->*Op_Lo)(T, Val); |
| 3886 constexpr bool Locked = true; | 3883 constexpr bool Locked = true; |
| 3887 _cmpxchg(Addr, T_eax, T, Locked); | 3884 _cmpxchg(Addr, T_eax, T, Locked); |
| 3888 _br(Traits::Cond::Br_ne, Label); | 3885 _br(Traits::Cond::Br_ne, Label); |
| 3889 // If Val is a variable, model the extended live range of Val through | 3886 // If Val is a variable, model the extended live range of Val through |
| 3890 // the end of the loop, since it will be re-used by the loop. | 3887 // the end of the loop, since it will be re-used by the loop. |
| 3891 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3888 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) { |
| 3892 Context.insert(InstFakeUse::create(Func, ValVar)); | 3889 Context.insert<InstFakeUse>(ValVar); |
| 3893 } | 3890 } |
| 3894 // The address base (if any) is also reused in the loop. | 3891 // The address base (if any) is also reused in the loop. |
| 3895 if (Variable *Base = Addr->getBase()) | 3892 if (Variable *Base = Addr->getBase()) |
| 3896 Context.insert(InstFakeUse::create(Func, Base)); | 3893 Context.insert<InstFakeUse>(Base); |
| 3897 _mov(Dest, T_eax); | 3894 _mov(Dest, T_eax); |
| 3898 } | 3895 } |
| 3899 | 3896 |
| 3900 /// Lowers count {trailing, leading} zeros intrinsic. | 3897 /// Lowers count {trailing, leading} zeros intrinsic. |
| 3901 /// | 3898 /// |
| 3902 /// We could do constant folding here, but that should have | 3899 /// We could do constant folding here, but that should have |
| 3903 /// been done by the front-end/middle-end optimizations. | 3900 /// been done by the front-end/middle-end optimizations. |
| 3904 template <class Machine> | 3901 template <class Machine> |
| 3905 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, | 3902 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, |
| 3906 Operand *FirstVal, | 3903 Operand *FirstVal, |
| (...skipping 746 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4653 // something else. We only care if it is Variable. | 4650 // something else. We only care if it is Variable. |
| 4654 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd); | 4651 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd); |
| 4655 if (Var == nullptr) | 4652 if (Var == nullptr) |
| 4656 return; | 4653 return; |
| 4657 // We use lowerStore() to copy out-args onto the stack. This creates a memory | 4654 // We use lowerStore() to copy out-args onto the stack. This creates a memory |
| 4658 // operand with the stack pointer as the base register. Don't do bounds | 4655 // operand with the stack pointer as the base register. Don't do bounds |
| 4659 // checks on that. | 4656 // checks on that. |
| 4660 if (Var->getRegNum() == Traits::RegisterSet::Reg_esp) | 4657 if (Var->getRegNum() == Traits::RegisterSet::Reg_esp) |
| 4661 return; | 4658 return; |
| 4662 | 4659 |
| 4663 typename Traits::Insts::Label *Label = | 4660 auto *Label = Traits::Insts::Label::create(Func, this); |
| 4664 Traits::Insts::Label::create(Func, this); | |
| 4665 _cmp(Opnd, Ctx->getConstantZero(IceType_i32)); | 4661 _cmp(Opnd, Ctx->getConstantZero(IceType_i32)); |
| 4666 _br(Traits::Cond::Br_e, Label); | 4662 _br(Traits::Cond::Br_e, Label); |
| 4667 _cmp(Opnd, Ctx->getConstantInt32(1)); | 4663 _cmp(Opnd, Ctx->getConstantInt32(1)); |
| 4668 _br(Traits::Cond::Br_e, Label); | 4664 _br(Traits::Cond::Br_e, Label); |
| 4669 Context.insert(Label); | 4665 Context.insert(Label); |
| 4670 } | 4666 } |
| 4671 | 4667 |
| 4672 template <class Machine> | 4668 template <class Machine> |
| 4673 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) { | 4669 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) { |
| 4674 // A Load instruction can be treated the same as an Assign instruction, after | 4670 // A Load instruction can be treated the same as an Assign instruction, after |
| (...skipping 29 matching lines...) Expand all Loading... |
| 4704 Constant *OffsetOp = nullptr; | 4700 Constant *OffsetOp = nullptr; |
| 4705 if (Relocatable == nullptr) { | 4701 if (Relocatable == nullptr) { |
| 4706 OffsetOp = Ctx->getConstantInt32(Offset); | 4702 OffsetOp = Ctx->getConstantInt32(Offset); |
| 4707 } else { | 4703 } else { |
| 4708 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, | 4704 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, |
| 4709 Relocatable->getName(), | 4705 Relocatable->getName(), |
| 4710 Relocatable->getSuppressMangling()); | 4706 Relocatable->getSuppressMangling()); |
| 4711 } | 4707 } |
| 4712 Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp, | 4708 Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp, |
| 4713 Index, Shift, SegmentReg); | 4709 Index, Shift, SegmentReg); |
| 4714 Context.insert(InstLoad::create(Func, Dest, Addr)); | 4710 Context.insert<InstLoad>(Dest, Addr); |
| 4715 } | 4711 } |
| 4716 } | 4712 } |
| 4717 | 4713 |
| 4718 template <class Machine> | 4714 template <class Machine> |
| 4719 void TargetX86Base<Machine>::randomlyInsertNop(float Probability, | 4715 void TargetX86Base<Machine>::randomlyInsertNop(float Probability, |
| 4720 RandomNumberGenerator &RNG) { | 4716 RandomNumberGenerator &RNG) { |
| 4721 RandomNumberGeneratorWrapper RNGW(RNG); | 4717 RandomNumberGeneratorWrapper RNGW(RNG); |
| 4722 if (RNGW.getTrueWithProbability(Probability)) { | 4718 if (RNGW.getTrueWithProbability(Probability)) { |
| 4723 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); | 4719 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); |
| 4724 } | 4720 } |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4768 | 4764 |
| 4769 template <class Machine> | 4765 template <class Machine> |
| 4770 void TargetX86Base<Machine>::lowerSelectMove(Variable *Dest, | 4766 void TargetX86Base<Machine>::lowerSelectMove(Variable *Dest, |
| 4771 typename Traits::Cond::BrCond Cond, | 4767 typename Traits::Cond::BrCond Cond, |
| 4772 Operand *SrcT, Operand *SrcF) { | 4768 Operand *SrcT, Operand *SrcF) { |
| 4773 Type DestTy = Dest->getType(); | 4769 Type DestTy = Dest->getType(); |
| 4774 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { | 4770 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { |
| 4775 // The cmov instruction doesn't allow 8-bit or FP operands, so we need | 4771 // The cmov instruction doesn't allow 8-bit or FP operands, so we need |
| 4776 // explicit control flow. | 4772 // explicit control flow. |
| 4777 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: | 4773 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: |
| 4778 typename Traits::Insts::Label *Label = | 4774 auto *Label = Traits::Insts::Label::create(Func, this); |
| 4779 Traits::Insts::Label::create(Func, this); | |
| 4780 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); | 4775 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); |
| 4781 _mov(Dest, SrcT); | 4776 _mov(Dest, SrcT); |
| 4782 _br(Cond, Label); | 4777 _br(Cond, Label); |
| 4783 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); | 4778 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); |
| 4784 _redefined(_mov(Dest, SrcF)); | 4779 _redefined(_mov(Dest, SrcF)); |
| 4785 Context.insert(Label); | 4780 Context.insert(Label); |
| 4786 return; | 4781 return; |
| 4787 } | 4782 } |
| 4788 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t | 4783 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t |
| 4789 // But if SrcT is immediate, we might be able to do better, as the cmov | 4784 // But if SrcT is immediate, we might be able to do better, as the cmov |
| (...skipping 221 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5011 Constant *OffsetOp = nullptr; | 5006 Constant *OffsetOp = nullptr; |
| 5012 if (Relocatable == nullptr) { | 5007 if (Relocatable == nullptr) { |
| 5013 OffsetOp = Ctx->getConstantInt32(Offset); | 5008 OffsetOp = Ctx->getConstantInt32(Offset); |
| 5014 } else { | 5009 } else { |
| 5015 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, | 5010 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, |
| 5016 Relocatable->getName(), | 5011 Relocatable->getName(), |
| 5017 Relocatable->getSuppressMangling()); | 5012 Relocatable->getSuppressMangling()); |
| 5018 } | 5013 } |
| 5019 Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp, | 5014 Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp, |
| 5020 Index, Shift, SegmentReg); | 5015 Index, Shift, SegmentReg); |
| 5021 auto *NewStore = InstStore::create(Func, Data, Addr); | 5016 auto *NewStore = Context.insert<InstStore>(Data, Addr); |
| 5022 if (Inst->getDest()) | 5017 if (Inst->getDest()) |
| 5023 NewStore->setRmwBeacon(Inst->getRmwBeacon()); | 5018 NewStore->setRmwBeacon(Inst->getRmwBeacon()); |
| 5024 Context.insert(NewStore); | |
| 5025 } | 5019 } |
| 5026 } | 5020 } |
| 5027 | 5021 |
| 5028 template <class Machine> | 5022 template <class Machine> |
| 5029 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison, | 5023 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison, |
| 5030 uint64_t Min, uint64_t Max) { | 5024 uint64_t Min, uint64_t Max) { |
| 5031 // TODO(ascull): 64-bit should not reach here but only because it is not | 5025 // TODO(ascull): 64-bit should not reach here but only because it is not |
| 5032 // implemented yet. This should be able to handle the 64-bit case. | 5026 // implemented yet. This should be able to handle the 64-bit case. |
| 5033 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64); | 5027 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64); |
| 5034 // Subtracting 0 is a nop so don't do it | 5028 // Subtracting 0 is a nop so don't do it |
| (...skipping 231 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5266 Type Ty = Dest->getType(); | 5260 Type Ty = Dest->getType(); |
| 5267 Type ElementTy = typeElementType(Ty); | 5261 Type ElementTy = typeElementType(Ty); |
| 5268 SizeT NumElements = typeNumElements(Ty); | 5262 SizeT NumElements = typeNumElements(Ty); |
| 5269 | 5263 |
| 5270 Operand *T = Ctx->getConstantUndef(Ty); | 5264 Operand *T = Ctx->getConstantUndef(Ty); |
| 5271 for (SizeT I = 0; I < NumElements; ++I) { | 5265 for (SizeT I = 0; I < NumElements; ++I) { |
| 5272 Constant *Index = Ctx->getConstantInt32(I); | 5266 Constant *Index = Ctx->getConstantInt32(I); |
| 5273 | 5267 |
| 5274 // Extract the next two inputs. | 5268 // Extract the next two inputs. |
| 5275 Variable *Op0 = Func->makeVariable(ElementTy); | 5269 Variable *Op0 = Func->makeVariable(ElementTy); |
| 5276 Context.insert(InstExtractElement::create(Func, Op0, Src0, Index)); | 5270 Context.insert<InstExtractElement>(Op0, Src0, Index); |
| 5277 Variable *Op1 = Func->makeVariable(ElementTy); | 5271 Variable *Op1 = Func->makeVariable(ElementTy); |
| 5278 Context.insert(InstExtractElement::create(Func, Op1, Src1, Index)); | 5272 Context.insert<InstExtractElement>(Op1, Src1, Index); |
| 5279 | 5273 |
| 5280 // Perform the arithmetic as a scalar operation. | 5274 // Perform the arithmetic as a scalar operation. |
| 5281 Variable *Res = Func->makeVariable(ElementTy); | 5275 Variable *Res = Func->makeVariable(ElementTy); |
| 5282 auto *Arith = InstArithmetic::create(Func, Kind, Res, Op0, Op1); | 5276 auto *Arith = Context.insert<InstArithmetic>(Kind, Res, Op0, Op1); |
| 5283 Context.insert(Arith); | |
| 5284 // We might have created an operation that needed a helper call. | 5277 // We might have created an operation that needed a helper call. |
| 5285 genTargetHelperCallFor(Arith); | 5278 genTargetHelperCallFor(Arith); |
| 5286 | 5279 |
| 5287 // Insert the result into position. | 5280 // Insert the result into position. |
| 5288 Variable *DestT = Func->makeVariable(Ty); | 5281 Variable *DestT = Func->makeVariable(Ty); |
| 5289 Context.insert(InstInsertElement::create(Func, DestT, T, Res, Index)); | 5282 Context.insert<InstInsertElement>(DestT, T, Res, Index); |
| 5290 T = DestT; | 5283 T = DestT; |
| 5291 } | 5284 } |
| 5292 | 5285 |
| 5293 Context.insert(InstAssign::create(Func, Dest, T)); | 5286 Context.insert<InstAssign>(Dest, T); |
| 5294 } | 5287 } |
| 5295 | 5288 |
| 5296 /// The following pattern occurs often in lowered C and C++ code: | 5289 /// The following pattern occurs often in lowered C and C++ code: |
| 5297 /// | 5290 /// |
| 5298 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 | 5291 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
| 5299 /// %cmp.ext = sext <n x i1> %cmp to <n x ty> | 5292 /// %cmp.ext = sext <n x i1> %cmp to <n x ty> |
| 5300 /// | 5293 /// |
| 5301 /// We can eliminate the sext operation by copying the result of pcmpeqd, | 5294 /// We can eliminate the sext operation by copying the result of pcmpeqd, |
| 5302 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the | 5295 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the |
| 5303 /// sext operation. | 5296 /// sext operation. |
| (...skipping 270 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5574 break; | 5567 break; |
| 5575 case IceType_i16: | 5568 case IceType_i16: |
| 5576 assert(Src0->getType() == IceType_v16i1); | 5569 assert(Src0->getType() == IceType_v16i1); |
| 5577 HelperName = H_bitcast_16xi1_i16; | 5570 HelperName = H_bitcast_16xi1_i16; |
| 5578 break; | 5571 break; |
| 5579 case IceType_v8i1: { | 5572 case IceType_v8i1: { |
| 5580 assert(Src0->getType() == IceType_i8); | 5573 assert(Src0->getType() == IceType_i8); |
| 5581 HelperName = H_bitcast_i8_8xi1; | 5574 HelperName = H_bitcast_i8_8xi1; |
| 5582 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); | 5575 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); |
| 5583 // Arguments to functions are required to be at least 32 bits wide. | 5576 // Arguments to functions are required to be at least 32 bits wide. |
| 5584 Context.insert(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); | 5577 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); |
| 5585 Src0 = Src0AsI32; | 5578 Src0 = Src0AsI32; |
| 5586 } break; | 5579 } break; |
| 5587 case IceType_v16i1: { | 5580 case IceType_v16i1: { |
| 5588 assert(Src0->getType() == IceType_i16); | 5581 assert(Src0->getType() == IceType_i16); |
| 5589 HelperName = H_bitcast_i16_16xi1; | 5582 HelperName = H_bitcast_i16_16xi1; |
| 5590 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); | 5583 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); |
| 5591 // Arguments to functions are required to be at least 32 bits wide. | 5584 // Arguments to functions are required to be at least 32 bits wide. |
| 5592 Context.insert(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); | 5585 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); |
| 5593 Src0 = Src0AsI32; | 5586 Src0 = Src0AsI32; |
| 5594 } break; | 5587 } break; |
| 5595 } | 5588 } |
| 5596 } break; | 5589 } break; |
| 5597 } | 5590 } |
| 5598 constexpr SizeT MaxSrcs = 1; | 5591 constexpr SizeT MaxSrcs = 1; |
| 5599 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); | 5592 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); |
| 5600 Call->addArg(Src0); | 5593 Call->addArg(Src0); |
| 5601 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); | 5594 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); |
| 5602 Context.insert(Call); | 5595 Context.insert(Call); |
| (...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5717 case IceType_i1: | 5710 case IceType_i1: |
| 5718 case IceType_i8: | 5711 case IceType_i8: |
| 5719 case IceType_i16: | 5712 case IceType_i16: |
| 5720 case IceType_i32: | 5713 case IceType_i32: |
| 5721 case IceType_i64: | 5714 case IceType_i64: |
| 5722 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. | 5715 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. |
| 5723 _mov(Reg, Ctx->getConstantZero(Ty)); | 5716 _mov(Reg, Ctx->getConstantZero(Ty)); |
| 5724 break; | 5717 break; |
| 5725 case IceType_f32: | 5718 case IceType_f32: |
| 5726 case IceType_f64: | 5719 case IceType_f64: |
| 5727 Context.insert(InstFakeDef::create(Func, Reg)); | 5720 Context.insert<InstFakeDef>(Reg); |
| 5728 _xorps(Reg, Reg); | 5721 _xorps(Reg, Reg); |
| 5729 break; | 5722 break; |
| 5730 default: | 5723 default: |
| 5731 // All vector types use the same pxor instruction. | 5724 // All vector types use the same pxor instruction. |
| 5732 assert(isVectorType(Ty)); | 5725 assert(isVectorType(Ty)); |
| 5733 Context.insert(InstFakeDef::create(Func, Reg)); | 5726 Context.insert<InstFakeDef>(Reg); |
| 5734 _pxor(Reg, Reg); | 5727 _pxor(Reg, Reg); |
| 5735 break; | 5728 break; |
| 5736 } | 5729 } |
| 5737 return Reg; | 5730 return Reg; |
| 5738 } | 5731 } |
| 5739 | 5732 |
| 5740 // There is no support for loading or emitting vector constants, so the vector | 5733 // There is no support for loading or emitting vector constants, so the vector |
| 5741 // values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are | 5734 // values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are |
| 5742 // initialized with register operations. | 5735 // initialized with register operations. |
| 5743 // | 5736 // |
| 5744 // TODO(wala): Add limited support for vector constants so that complex | 5737 // TODO(wala): Add limited support for vector constants so that complex |
| 5745 // initialization in registers is unnecessary. | 5738 // initialization in registers is unnecessary. |
| 5746 | 5739 |
| 5747 template <class Machine> | 5740 template <class Machine> |
| 5748 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) { | 5741 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
| 5749 return makeZeroedRegister(Ty, RegNum); | 5742 return makeZeroedRegister(Ty, RegNum); |
| 5750 } | 5743 } |
| 5751 | 5744 |
| 5752 template <class Machine> | 5745 template <class Machine> |
| 5753 Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty, | 5746 Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty, |
| 5754 int32_t RegNum) { | 5747 int32_t RegNum) { |
| 5755 Variable *MinusOnes = makeReg(Ty, RegNum); | 5748 Variable *MinusOnes = makeReg(Ty, RegNum); |
| 5756 // Insert a FakeDef so the live range of MinusOnes is not overestimated. | 5749 // Insert a FakeDef so the live range of MinusOnes is not overestimated. |
| 5757 Context.insert(InstFakeDef::create(Func, MinusOnes)); | 5750 Context.insert<InstFakeDef>(MinusOnes); |
| 5758 _pcmpeq(MinusOnes, MinusOnes); | 5751 _pcmpeq(MinusOnes, MinusOnes); |
| 5759 return MinusOnes; | 5752 return MinusOnes; |
| 5760 } | 5753 } |
| 5761 | 5754 |
| 5762 template <class Machine> | 5755 template <class Machine> |
| 5763 Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) { | 5756 Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) { |
| 5764 Variable *Dest = makeVectorOfZeros(Ty, RegNum); | 5757 Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
| 5765 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 5758 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 5766 _psub(Dest, MinusOne); | 5759 _psub(Dest, MinusOne); |
| 5767 return Dest; | 5760 return Dest; |
| (...skipping 289 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6057 template <class Machine> | 6050 template <class Machine> |
| 6058 Operand *TargetX86Base<Machine>::legalizeUndef(Operand *From, int32_t RegNum) { | 6051 Operand *TargetX86Base<Machine>::legalizeUndef(Operand *From, int32_t RegNum) { |
| 6059 Type Ty = From->getType(); | 6052 Type Ty = From->getType(); |
| 6060 if (llvm::isa<ConstantUndef>(From)) { | 6053 if (llvm::isa<ConstantUndef>(From)) { |
| 6061 // Lower undefs to zero. Another option is to lower undefs to an | 6054 // Lower undefs to zero. Another option is to lower undefs to an |
| 6062 // uninitialized register; however, using an uninitialized register results | 6055 // uninitialized register; however, using an uninitialized register results |
| 6063 // in less predictable code. | 6056 // in less predictable code. |
| 6064 // | 6057 // |
| 6065 // If in the future the implementation is changed to lower undef values to | 6058 // If in the future the implementation is changed to lower undef values to |
| 6066 // uninitialized registers, a FakeDef will be needed: | 6059 // uninitialized registers, a FakeDef will be needed: |
| 6067 // Context.insert(InstFakeDef::create(Func, Reg)); | 6060 // Context.insert<InstFakeDef>(Reg); |
| 6068 // This is in order to ensure that the live range of Reg is not | 6061 // This is in order to ensure that the live range of Reg is not |
| 6069 // overestimated. If the constant being lowered is a 64 bit value, then | 6062 // overestimated. If the constant being lowered is a 64 bit value, then |
| 6070 // the result should be split and the lo and hi components will need to go | 6063 // the result should be split and the lo and hi components will need to go |
| 6071 // in uninitialized registers. | 6064 // in uninitialized registers. |
| 6072 if (isVectorType(Ty)) | 6065 if (isVectorType(Ty)) |
| 6073 return makeVectorOfZeros(Ty, RegNum); | 6066 return makeVectorOfZeros(Ty, RegNum); |
| 6074 return Ctx->getConstantZero(Ty); | 6067 return Ctx->getConstantZero(Ty); |
| 6075 } | 6068 } |
| 6076 return From; | 6069 return From; |
| 6077 } | 6070 } |
| (...skipping 332 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6410 } | 6403 } |
| 6411 // the offset is not eligible for blinding or pooling, return the original | 6404 // the offset is not eligible for blinding or pooling, return the original |
| 6412 // mem operand | 6405 // mem operand |
| 6413 return MemOperand; | 6406 return MemOperand; |
| 6414 } | 6407 } |
| 6415 | 6408 |
| 6416 } // end of namespace X86Internal | 6409 } // end of namespace X86Internal |
| 6417 } // end of namespace Ice | 6410 } // end of namespace Ice |
| 6418 | 6411 |
| 6419 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 6412 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |