Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 285 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 296 } | 296 } |
| 297 | 297 |
| 298 template <class Machine> void TargetX86Base<Machine>::staticInit() { | 298 template <class Machine> void TargetX86Base<Machine>::staticInit() { |
| 299 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs); | 299 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs); |
| 300 } | 300 } |
| 301 | 301 |
| 302 template <class Machine> void TargetX86Base<Machine>::translateO2() { | 302 template <class Machine> void TargetX86Base<Machine>::translateO2() { |
| 303 TimerMarker T(TimerStack::TT_O2, Func); | 303 TimerMarker T(TimerStack::TT_O2, Func); |
| 304 | 304 |
| 305 genTargetHelperCalls(); | 305 genTargetHelperCalls(); |
| 306 Func->dump("After target helper call insertion"); | |
| 306 | 307 |
| 307 // Merge Alloca instructions, and lay out the stack. | 308 // Merge Alloca instructions, and lay out the stack. |
| 308 static constexpr bool SortAndCombineAllocas = true; | 309 static constexpr bool SortAndCombineAllocas = true; |
| 309 Func->processAllocas(SortAndCombineAllocas); | 310 Func->processAllocas(SortAndCombineAllocas); |
| 310 Func->dump("After Alloca processing"); | 311 Func->dump("After Alloca processing"); |
| 311 | 312 |
| 312 if (!Ctx->getFlags().getPhiEdgeSplit()) { | 313 if (!Ctx->getFlags().getPhiEdgeSplit()) { |
| 313 // Lower Phi instructions. | 314 // Lower Phi instructions. |
| 314 Func->placePhiLoads(); | 315 Func->placePhiLoads(); |
| 315 if (Func->hasError()) | 316 if (Func->hasError()) |
| (...skipping 635 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 951 | 952 |
| 952 template <class Machine> | 953 template <class Machine> |
| 953 llvm::SmallBitVector | 954 llvm::SmallBitVector |
| 954 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, | 955 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, |
| 955 RegSetMask Exclude) const { | 956 RegSetMask Exclude) const { |
| 956 return Traits::getRegisterSet(Include, Exclude); | 957 return Traits::getRegisterSet(Include, Exclude); |
| 957 } | 958 } |
| 958 | 959 |
| 959 template <class Machine> | 960 template <class Machine> |
| 960 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { | 961 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { |
| 961 if (!Inst->getKnownFrameOffset()) | |
| 962 setHasFramePointer(); | |
| 963 // Conservatively require the stack to be aligned. Some stack adjustment | 962 // Conservatively require the stack to be aligned. Some stack adjustment |
| 964 // operations implemented below assume that the stack is aligned before the | 963 // operations implemented below assume that the stack is aligned before the |
| 965 // alloca. All the alloca code ensures that the stack alignment is preserved | 964 // alloca. All the alloca code ensures that the stack alignment is preserved |
| 966 // after the alloca. The stack alignment restriction can be relaxed in some | 965 // after the alloca. The stack alignment restriction can be relaxed in some |
| 967 // cases. | 966 // cases. |
| 968 NeedsStackAlignment = true; | 967 NeedsStackAlignment = true; |
| 969 | 968 |
| 970 // TODO(stichnot): minimize the number of adjustments of esp, etc. | |
| 971 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 972 Operand *TotalSize = legalize(Inst->getSizeInBytes()); | |
| 973 Variable *Dest = Inst->getDest(); | |
| 974 uint32_t AlignmentParam = Inst->getAlignInBytes(); | |
| 975 // For default align=0, set it to the real value 1, to avoid any | 969 // For default align=0, set it to the real value 1, to avoid any |
| 976 // bit-manipulation problems below. | 970 // bit-manipulation problems below. |
| 977 AlignmentParam = std::max(AlignmentParam, 1u); | 971 const uint32_t AlignmentParam = std::max(1u, Inst->getAlignInBytes()); |
| 978 | 972 |
| 979 // LLVM enforces power of 2 alignment. | 973 // LLVM enforces power of 2 alignment. |
| 980 assert(llvm::isPowerOf2_32(AlignmentParam)); | 974 assert(llvm::isPowerOf2_32(AlignmentParam)); |
| 981 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); | 975 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); |
| 982 | 976 |
| 983 uint32_t Alignment = | 977 const uint32_t Alignment = |
| 984 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); | 978 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); |
| 985 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) { | 979 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES; |
| 980 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1; | |
| 981 const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset(); | |
| 982 const bool UseFramePointer = | |
| 983 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; | |
| 984 | |
| 985 if (UseFramePointer) | |
| 986 setHasFramePointer(); | 986 setHasFramePointer(); |
| 987 | |
| 988 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 989 if (OverAligned) { | |
| 987 _and(esp, Ctx->getConstantInt32(-Alignment)); | 990 _and(esp, Ctx->getConstantInt32(-Alignment)); |
| 988 } | 991 } |
| 992 | |
| 993 Variable *Dest = Inst->getDest(); | |
| 994 Operand *TotalSize = legalize(Inst->getSizeInBytes()); | |
| 995 | |
| 989 if (const auto *ConstantTotalSize = | 996 if (const auto *ConstantTotalSize = |
| 990 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { | 997 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { |
| 991 uint32_t Value = ConstantTotalSize->getValue(); | 998 const uint32_t Value = |
| 992 Value = Utils::applyAlignment(Value, Alignment); | 999 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment); |
| 993 if (Inst->getKnownFrameOffset()) { | 1000 if (!UseFramePointer) { |
| 994 _adjust_stack(Value); | 1001 // If we don't need a Frame Pointer, this alloca has a known offset to the |
| 1002 // stack pointer. We don't need adjust the stack pointer, nor assign any | |
| 1003 // value to Dest, as Dest is rematerializable. | |
| 1004 assert(Dest->isRematerializable()); | |
| 995 FixedAllocaSizeBytes += Value; | 1005 FixedAllocaSizeBytes += Value; |
| 1006 Context.insert(InstFakeDef::create(Func, Dest)); | |
| 996 } else { | 1007 } else { |
| 997 _sub(esp, Ctx->getConstantInt32(Value)); | 1008 _sub(esp, Ctx->getConstantInt32(Value)); |
| 998 } | 1009 } |
| 999 } else { | 1010 } else { |
| 1000 // Non-constant sizes need to be adjusted to the next highest multiple of | 1011 // Non-constant sizes need to be adjusted to the next highest multiple of |
| 1001 // the required alignment at runtime. | 1012 // the required alignment at runtime. |
| 1002 Variable *T = makeReg(IceType_i32); | 1013 Variable *T = makeReg(IceType_i32); |
| 1003 _mov(T, TotalSize); | 1014 _mov(T, TotalSize); |
| 1004 _add(T, Ctx->getConstantInt32(Alignment - 1)); | 1015 _add(T, Ctx->getConstantInt32(Alignment - 1)); |
| 1005 _and(T, Ctx->getConstantInt32(-Alignment)); | 1016 _and(T, Ctx->getConstantInt32(-Alignment)); |
| 1006 _sub(esp, T); | 1017 _sub(esp, T); |
| 1007 } | 1018 } |
| 1008 _mov(Dest, esp); | 1019 // Add enough to the returned address to account for the out args area. |
| 1020 uint32_t OutArgsSize = maxOutArgsSizeBytes(); | |
| 1021 if (OutArgsSize > 0) { | |
| 1022 Variable *T = makeReg(IceType_i32); | |
| 1023 typename Traits::X86OperandMem *CalculateOperand = | |
| 1024 Traits::X86OperandMem::create( | |
| 1025 Func, IceType_i32, esp, | |
| 1026 Ctx->getConstantInt(IceType_i32, OutArgsSize)); | |
| 1027 _lea(T, CalculateOperand); | |
| 1028 _mov(Dest, T); | |
| 1029 } else { | |
| 1030 _mov(Dest, esp); | |
| 1031 } | |
| 1009 } | 1032 } |
| 1010 | 1033 |
| 1011 /// Strength-reduce scalar integer multiplication by a constant (for i32 or | 1034 /// Strength-reduce scalar integer multiplication by a constant (for i32 or |
| 1012 /// narrower) for certain constants. The lea instruction can be used to multiply | 1035 /// narrower) for certain constants. The lea instruction can be used to multiply |
| 1013 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of | 1036 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of |
| 1014 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 | 1037 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 |
| 1015 /// lea-based multiplies by 5, combined with left-shifting by 2. | 1038 /// lea-based multiplies by 5, combined with left-shifting by 2. |
| 1016 template <class Machine> | 1039 template <class Machine> |
| 1017 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, | 1040 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, |
| 1018 int32_t Src1) { | 1041 int32_t Src1) { |
| (...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1348 (void)SwapCount; | 1371 (void)SwapCount; |
| 1349 } | 1372 } |
| 1350 if (!Traits::Is64Bit && Ty == IceType_i64) { | 1373 if (!Traits::Is64Bit && Ty == IceType_i64) { |
| 1351 // These x86-32 helper-call-involved instructions are lowered in this | 1374 // These x86-32 helper-call-involved instructions are lowered in this |
| 1352 // separate switch. This is because loOperand() and hiOperand() may insert | 1375 // separate switch. This is because loOperand() and hiOperand() may insert |
| 1353 // redundant instructions for constant blinding and pooling. Such redundant | 1376 // redundant instructions for constant blinding and pooling. Such redundant |
| 1354 // instructions will fail liveness analysis under -Om1 setting. And, | 1377 // instructions will fail liveness analysis under -Om1 setting. And, |
| 1355 // actually these arguments do not need to be processed with loOperand() | 1378 // actually these arguments do not need to be processed with loOperand() |
| 1356 // and hiOperand() to be used. | 1379 // and hiOperand() to be used. |
| 1357 switch (Inst->getOp()) { | 1380 switch (Inst->getOp()) { |
| 1358 case InstArithmetic::Udiv: { | 1381 case InstArithmetic::Udiv: |
| 1359 constexpr SizeT MaxSrcs = 2; | 1382 case InstArithmetic::Sdiv: |
| 1360 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); | 1383 case InstArithmetic::Urem: |
| 1361 Call->addArg(Inst->getSrc(0)); | 1384 case InstArithmetic::Srem: |
| 1362 Call->addArg(Inst->getSrc(1)); | 1385 llvm::report_fatal_error("Helper call was expected"); |
| 1363 lowerCall(Call); | |
| 1364 return; | 1386 return; |
| 1365 } | |
| 1366 case InstArithmetic::Sdiv: { | |
| 1367 constexpr SizeT MaxSrcs = 2; | |
| 1368 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs); | |
| 1369 Call->addArg(Inst->getSrc(0)); | |
| 1370 Call->addArg(Inst->getSrc(1)); | |
| 1371 lowerCall(Call); | |
| 1372 return; | |
| 1373 } | |
| 1374 case InstArithmetic::Urem: { | |
| 1375 constexpr SizeT MaxSrcs = 2; | |
| 1376 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs); | |
| 1377 Call->addArg(Inst->getSrc(0)); | |
| 1378 Call->addArg(Inst->getSrc(1)); | |
| 1379 lowerCall(Call); | |
| 1380 return; | |
| 1381 } | |
| 1382 case InstArithmetic::Srem: { | |
| 1383 constexpr SizeT MaxSrcs = 2; | |
| 1384 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs); | |
| 1385 Call->addArg(Inst->getSrc(0)); | |
| 1386 Call->addArg(Inst->getSrc(1)); | |
| 1387 lowerCall(Call); | |
| 1388 return; | |
| 1389 } | |
| 1390 default: | 1387 default: |
| 1391 break; | 1388 break; |
| 1392 } | 1389 } |
| 1393 | 1390 |
| 1394 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1391 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 1395 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1392 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 1396 Operand *Src0Lo = loOperand(Src0); | 1393 Operand *Src0Lo = loOperand(Src0); |
| 1397 Operand *Src0Hi = hiOperand(Src0); | 1394 Operand *Src0Hi = hiOperand(Src0); |
| 1398 Operand *Src1Lo = loOperand(Src1); | 1395 Operand *Src1Lo = loOperand(Src1); |
| 1399 Operand *Src1Hi = hiOperand(Src1); | 1396 Operand *Src1Hi = hiOperand(Src1); |
| (...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1574 Variable *T4 = makeReg(IceType_v4i32); | 1571 Variable *T4 = makeReg(IceType_v4i32); |
| 1575 _movp(T1, Src0); | 1572 _movp(T1, Src0); |
| 1576 _pshufd(T2, Src0, Mask1030); | 1573 _pshufd(T2, Src0, Mask1030); |
| 1577 _pshufd(T3, Src1, Mask1030); | 1574 _pshufd(T3, Src1, Mask1030); |
| 1578 _pmuludq(T1, Src1); | 1575 _pmuludq(T1, Src1); |
| 1579 _pmuludq(T2, T3); | 1576 _pmuludq(T2, T3); |
| 1580 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); | 1577 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); |
| 1581 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213)); | 1578 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213)); |
| 1582 _movp(Dest, T4); | 1579 _movp(Dest, T4); |
| 1583 } else if (Ty == IceType_v16i8) { | 1580 } else if (Ty == IceType_v16i8) { |
| 1584 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); | 1581 llvm::report_fatal_error("Scalarized operation was expected"); |
| 1585 } else { | 1582 } else { |
| 1586 llvm::report_fatal_error("Invalid vector multiply type"); | 1583 llvm::report_fatal_error("Invalid vector multiply type"); |
| 1587 } | 1584 } |
| 1588 } break; | 1585 } break; |
| 1589 case InstArithmetic::Shl: | 1586 case InstArithmetic::Shl: |
| 1590 case InstArithmetic::Lshr: | 1587 case InstArithmetic::Lshr: |
| 1591 case InstArithmetic::Ashr: | 1588 case InstArithmetic::Ashr: |
| 1592 case InstArithmetic::Udiv: | 1589 case InstArithmetic::Udiv: |
| 1593 case InstArithmetic::Urem: | 1590 case InstArithmetic::Urem: |
| 1594 case InstArithmetic::Sdiv: | 1591 case InstArithmetic::Sdiv: |
| 1595 case InstArithmetic::Srem: | 1592 case InstArithmetic::Srem: |
| 1596 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); | 1593 llvm::report_fatal_error("Scalarized operation was expected"); |
| 1597 break; | 1594 break; |
| 1598 case InstArithmetic::Fadd: { | 1595 case InstArithmetic::Fadd: { |
| 1599 Variable *T = makeReg(Ty); | 1596 Variable *T = makeReg(Ty); |
| 1600 _movp(T, Src0); | 1597 _movp(T, Src0); |
| 1601 _addps(T, Src1); | 1598 _addps(T, Src1); |
| 1602 _movp(Dest, T); | 1599 _movp(Dest, T); |
| 1603 } break; | 1600 } break; |
| 1604 case InstArithmetic::Fsub: { | 1601 case InstArithmetic::Fsub: { |
| 1605 Variable *T = makeReg(Ty); | 1602 Variable *T = makeReg(Ty); |
| 1606 _movp(T, Src0); | 1603 _movp(T, Src0); |
| 1607 _subps(T, Src1); | 1604 _subps(T, Src1); |
| 1608 _movp(Dest, T); | 1605 _movp(Dest, T); |
| 1609 } break; | 1606 } break; |
| 1610 case InstArithmetic::Fmul: { | 1607 case InstArithmetic::Fmul: { |
| 1611 Variable *T = makeReg(Ty); | 1608 Variable *T = makeReg(Ty); |
| 1612 _movp(T, Src0); | 1609 _movp(T, Src0); |
| 1613 _mulps(T, Src0 == Src1 ? T : Src1); | 1610 _mulps(T, Src0 == Src1 ? T : Src1); |
| 1614 _movp(Dest, T); | 1611 _movp(Dest, T); |
| 1615 } break; | 1612 } break; |
| 1616 case InstArithmetic::Fdiv: { | 1613 case InstArithmetic::Fdiv: { |
| 1617 Variable *T = makeReg(Ty); | 1614 Variable *T = makeReg(Ty); |
| 1618 _movp(T, Src0); | 1615 _movp(T, Src0); |
| 1619 _divps(T, Src1); | 1616 _divps(T, Src1); |
| 1620 _movp(Dest, T); | 1617 _movp(Dest, T); |
| 1621 } break; | 1618 } break; |
| 1622 case InstArithmetic::Frem: | 1619 case InstArithmetic::Frem: |
| 1623 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); | 1620 llvm::report_fatal_error("Scalarized operation was expected"); |
| 1624 break; | 1621 break; |
| 1625 } | 1622 } |
| 1626 return; | 1623 return; |
| 1627 } | 1624 } |
| 1628 Variable *T_edx = nullptr; | 1625 Variable *T_edx = nullptr; |
| 1629 Variable *T = nullptr; | 1626 Variable *T = nullptr; |
| 1630 switch (Inst->getOp()) { | 1627 switch (Inst->getOp()) { |
| 1631 case InstArithmetic::_num: | 1628 case InstArithmetic::_num: |
| 1632 llvm_unreachable("Unknown arithmetic operator"); | 1629 llvm_unreachable("Unknown arithmetic operator"); |
| 1633 break; | 1630 break; |
| (...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1884 case InstArithmetic::Fmul: | 1881 case InstArithmetic::Fmul: |
| 1885 _mov(T, Src0); | 1882 _mov(T, Src0); |
| 1886 _mulss(T, Src0 == Src1 ? T : Src1); | 1883 _mulss(T, Src0 == Src1 ? T : Src1); |
| 1887 _mov(Dest, T); | 1884 _mov(Dest, T); |
| 1888 break; | 1885 break; |
| 1889 case InstArithmetic::Fdiv: | 1886 case InstArithmetic::Fdiv: |
| 1890 _mov(T, Src0); | 1887 _mov(T, Src0); |
| 1891 _divss(T, Src1); | 1888 _divss(T, Src1); |
| 1892 _mov(Dest, T); | 1889 _mov(Dest, T); |
| 1893 break; | 1890 break; |
| 1894 case InstArithmetic::Frem: { | 1891 case InstArithmetic::Frem: |
| 1895 constexpr SizeT MaxSrcs = 2; | 1892 llvm::report_fatal_error("Helper call was expected"); |
| 1896 InstCall *Call = makeHelperCall( | 1893 break; |
| 1897 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); | |
| 1898 Call->addArg(Src0); | |
| 1899 Call->addArg(Src1); | |
| 1900 return lowerCall(Call); | |
| 1901 } | |
| 1902 } | 1894 } |
| 1903 } | 1895 } |
| 1904 | 1896 |
| 1905 template <class Machine> | 1897 template <class Machine> |
| 1906 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { | 1898 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { |
| 1907 Variable *Dest = Inst->getDest(); | 1899 Variable *Dest = Inst->getDest(); |
| 1908 if (Dest->isRematerializable()) { | 1900 if (Dest->isRematerializable()) { |
| 1909 Context.insert(InstFakeDef::create(Func, Dest)); | 1901 Context.insert(InstFakeDef::create(Func, Dest)); |
| 1910 return; | 1902 return; |
| 1911 } | 1903 } |
| (...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2154 if (isVectorType(DestTy)) { | 2146 if (isVectorType(DestTy)) { |
| 2155 assert(DestTy == IceType_v4i32 && | 2147 assert(DestTy == IceType_v4i32 && |
| 2156 Inst->getSrc(0)->getType() == IceType_v4f32); | 2148 Inst->getSrc(0)->getType() == IceType_v4f32); |
| 2157 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2149 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2158 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2150 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
| 2159 Src0RM = legalizeToReg(Src0RM); | 2151 Src0RM = legalizeToReg(Src0RM); |
| 2160 Variable *T = makeReg(DestTy); | 2152 Variable *T = makeReg(DestTy); |
| 2161 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); | 2153 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); |
| 2162 _movp(Dest, T); | 2154 _movp(Dest, T); |
| 2163 } else if (!Traits::Is64Bit && DestTy == IceType_i64) { | 2155 } else if (!Traits::Is64Bit && DestTy == IceType_i64) { |
| 2164 constexpr SizeT MaxSrcs = 1; | 2156 llvm::report_fatal_error("Helper call was expected"); |
| 2165 Type SrcType = Inst->getSrc(0)->getType(); | |
| 2166 InstCall *Call = | |
| 2167 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 | |
| 2168 : H_fptosi_f64_i64, | |
| 2169 Dest, MaxSrcs); | |
| 2170 Call->addArg(Inst->getSrc(0)); | |
| 2171 lowerCall(Call); | |
| 2172 } else { | 2157 } else { |
| 2173 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2158 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2174 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | 2159 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type |
| 2175 Variable *T_1 = nullptr; | 2160 Variable *T_1 = nullptr; |
| 2176 if (Traits::Is64Bit && DestTy == IceType_i64) { | 2161 if (Traits::Is64Bit && DestTy == IceType_i64) { |
| 2177 T_1 = makeReg(IceType_i64); | 2162 T_1 = makeReg(IceType_i64); |
| 2178 } else { | 2163 } else { |
| 2179 assert(DestTy != IceType_i64); | 2164 assert(DestTy != IceType_i64); |
| 2180 T_1 = makeReg(IceType_i32); | 2165 T_1 = makeReg(IceType_i32); |
| 2181 } | 2166 } |
| 2182 // cvt() requires its integer argument to be a GPR. | 2167 // cvt() requires its integer argument to be a GPR. |
| 2183 Variable *T_2 = makeReg(DestTy); | 2168 Variable *T_2 = makeReg(DestTy); |
| 2184 if (isByteSizedType(DestTy)) { | 2169 if (isByteSizedType(DestTy)) { |
| 2185 assert(T_1->getType() == IceType_i32); | 2170 assert(T_1->getType() == IceType_i32); |
| 2186 T_1->setRegClass(RCX86_Is32To8); | 2171 T_1->setRegClass(RCX86_Is32To8); |
| 2187 T_2->setRegClass(RCX86_IsTrunc8Rcvr); | 2172 T_2->setRegClass(RCX86_IsTrunc8Rcvr); |
| 2188 } | 2173 } |
| 2189 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); | 2174 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); |
| 2190 _mov(T_2, T_1); // T_1 and T_2 may have different integer types | 2175 _mov(T_2, T_1); // T_1 and T_2 may have different integer types |
| 2191 if (DestTy == IceType_i1) | 2176 if (DestTy == IceType_i1) |
| 2192 _and(T_2, Ctx->getConstantInt1(1)); | 2177 _and(T_2, Ctx->getConstantInt1(1)); |
| 2193 _mov(Dest, T_2); | 2178 _mov(Dest, T_2); |
| 2194 } | 2179 } |
| 2195 break; | 2180 break; |
| 2196 case InstCast::Fptoui: | 2181 case InstCast::Fptoui: |
| 2197 if (isVectorType(DestTy)) { | 2182 if (isVectorType(DestTy)) { |
| 2198 assert(DestTy == IceType_v4i32 && | 2183 llvm::report_fatal_error("Helper call was expected"); |
| 2199 Inst->getSrc(0)->getType() == IceType_v4f32); | |
| 2200 constexpr SizeT MaxSrcs = 1; | |
| 2201 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); | |
| 2202 Call->addArg(Inst->getSrc(0)); | |
| 2203 lowerCall(Call); | |
| 2204 } else if (DestTy == IceType_i64 || | 2184 } else if (DestTy == IceType_i64 || |
| 2205 (!Traits::Is64Bit && DestTy == IceType_i32)) { | 2185 (!Traits::Is64Bit && DestTy == IceType_i32)) { |
| 2206 // Use a helper for both x86-32 and x86-64. | 2186 llvm::report_fatal_error("Helper call was expected"); |
| 2207 constexpr SizeT MaxSrcs = 1; | |
| 2208 Type SrcType = Inst->getSrc(0)->getType(); | |
| 2209 IceString TargetString; | |
| 2210 if (Traits::Is64Bit) { | |
| 2211 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 | |
| 2212 : H_fptoui_f64_i64; | |
| 2213 } else if (isInt32Asserting32Or64(DestTy)) { | |
| 2214 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 | |
| 2215 : H_fptoui_f64_i32; | |
| 2216 } else { | |
| 2217 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 | |
| 2218 : H_fptoui_f64_i64; | |
| 2219 } | |
| 2220 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); | |
| 2221 Call->addArg(Inst->getSrc(0)); | |
| 2222 lowerCall(Call); | |
| 2223 return; | |
| 2224 } else { | 2187 } else { |
| 2225 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2188 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2226 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | 2189 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type |
| 2227 assert(DestTy != IceType_i64); | 2190 assert(DestTy != IceType_i64); |
| 2228 Variable *T_1 = nullptr; | 2191 Variable *T_1 = nullptr; |
| 2229 if (Traits::Is64Bit && DestTy == IceType_i32) { | 2192 if (Traits::Is64Bit && DestTy == IceType_i32) { |
| 2230 T_1 = makeReg(IceType_i64); | 2193 T_1 = makeReg(IceType_i64); |
| 2231 } else { | 2194 } else { |
| 2232 assert(DestTy != IceType_i32); | 2195 assert(DestTy != IceType_i32); |
| 2233 T_1 = makeReg(IceType_i32); | 2196 T_1 = makeReg(IceType_i32); |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 2249 if (isVectorType(DestTy)) { | 2212 if (isVectorType(DestTy)) { |
| 2250 assert(DestTy == IceType_v4f32 && | 2213 assert(DestTy == IceType_v4f32 && |
| 2251 Inst->getSrc(0)->getType() == IceType_v4i32); | 2214 Inst->getSrc(0)->getType() == IceType_v4i32); |
| 2252 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2215 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2253 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2216 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
| 2254 Src0RM = legalizeToReg(Src0RM); | 2217 Src0RM = legalizeToReg(Src0RM); |
| 2255 Variable *T = makeReg(DestTy); | 2218 Variable *T = makeReg(DestTy); |
| 2256 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); | 2219 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); |
| 2257 _movp(Dest, T); | 2220 _movp(Dest, T); |
| 2258 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { | 2221 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { |
| 2259 // Use a helper for x86-32. | 2222 llvm::report_fatal_error("Helper call was expected"); |
| 2260 constexpr SizeT MaxSrcs = 1; | |
| 2261 InstCall *Call = | |
| 2262 makeHelperCall(isFloat32Asserting32Or64(DestTy) ? H_sitofp_i64_f32 | |
| 2263 : H_sitofp_i64_f64, | |
| 2264 Dest, MaxSrcs); | |
| 2265 // TODO: Call the correct compiler-rt helper function. | |
| 2266 Call->addArg(Inst->getSrc(0)); | |
| 2267 lowerCall(Call); | |
| 2268 return; | |
| 2269 } else { | 2223 } else { |
| 2270 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2224 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2271 // Sign-extend the operand. | 2225 // Sign-extend the operand. |
| 2272 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 | 2226 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 |
| 2273 Variable *T_1 = nullptr; | 2227 Variable *T_1 = nullptr; |
| 2274 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) { | 2228 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) { |
| 2275 T_1 = makeReg(IceType_i64); | 2229 T_1 = makeReg(IceType_i64); |
| 2276 } else { | 2230 } else { |
| 2277 assert(Src0RM->getType() != IceType_i64); | 2231 assert(Src0RM->getType() != IceType_i64); |
| 2278 T_1 = makeReg(IceType_i32); | 2232 T_1 = makeReg(IceType_i32); |
| 2279 } | 2233 } |
| 2280 Variable *T_2 = makeReg(DestTy); | 2234 Variable *T_2 = makeReg(DestTy); |
| 2281 if (Src0RM->getType() == T_1->getType()) | 2235 if (Src0RM->getType() == T_1->getType()) |
| 2282 _mov(T_1, Src0RM); | 2236 _mov(T_1, Src0RM); |
| 2283 else | 2237 else |
| 2284 _movsx(T_1, Src0RM); | 2238 _movsx(T_1, Src0RM); |
| 2285 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); | 2239 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); |
| 2286 _mov(Dest, T_2); | 2240 _mov(Dest, T_2); |
| 2287 } | 2241 } |
| 2288 break; | 2242 break; |
| 2289 case InstCast::Uitofp: { | 2243 case InstCast::Uitofp: { |
| 2290 Operand *Src0 = Inst->getSrc(0); | 2244 Operand *Src0 = Inst->getSrc(0); |
| 2291 if (isVectorType(Src0->getType())) { | 2245 if (isVectorType(Src0->getType())) { |
| 2292 assert(DestTy == IceType_v4f32 && Src0->getType() == IceType_v4i32); | 2246 llvm::report_fatal_error("Helper call was expected"); |
| 2293 constexpr SizeT MaxSrcs = 1; | |
| 2294 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); | |
| 2295 Call->addArg(Src0); | |
| 2296 lowerCall(Call); | |
| 2297 } else if (Src0->getType() == IceType_i64 || | 2247 } else if (Src0->getType() == IceType_i64 || |
| 2298 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { | 2248 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { |
| 2299 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on | 2249 llvm::report_fatal_error("Helper call was expected"); |
| 2300 // x86-32. | |
| 2301 constexpr SizeT MaxSrcs = 1; | |
| 2302 IceString TargetString; | |
| 2303 if (isInt32Asserting32Or64(Src0->getType())) { | |
| 2304 TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i32_f32 | |
| 2305 : H_uitofp_i32_f64; | |
| 2306 } else { | |
| 2307 TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i64_f32 | |
| 2308 : H_uitofp_i64_f64; | |
| 2309 } | |
| 2310 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); | |
| 2311 Call->addArg(Src0); | |
| 2312 lowerCall(Call); | |
| 2313 return; | |
| 2314 } else { | 2250 } else { |
| 2315 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2251 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2316 // Zero-extend the operand. | 2252 // Zero-extend the operand. |
| 2317 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 | 2253 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 |
| 2318 Variable *T_1 = nullptr; | 2254 Variable *T_1 = nullptr; |
| 2319 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) { | 2255 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) { |
| 2320 T_1 = makeReg(IceType_i64); | 2256 T_1 = makeReg(IceType_i64); |
| 2321 } else { | 2257 } else { |
| 2322 assert(Src0RM->getType() != IceType_i64); | 2258 assert(Src0RM->getType() != IceType_i64); |
| 2323 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32); | 2259 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32); |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 2337 Operand *Src0 = Inst->getSrc(0); | 2273 Operand *Src0 = Inst->getSrc(0); |
| 2338 if (DestTy == Src0->getType()) { | 2274 if (DestTy == Src0->getType()) { |
| 2339 InstAssign *Assign = InstAssign::create(Func, Dest, Src0); | 2275 InstAssign *Assign = InstAssign::create(Func, Dest, Src0); |
| 2340 lowerAssign(Assign); | 2276 lowerAssign(Assign); |
| 2341 return; | 2277 return; |
| 2342 } | 2278 } |
| 2343 switch (DestTy) { | 2279 switch (DestTy) { |
| 2344 default: | 2280 default: |
| 2345 llvm_unreachable("Unexpected Bitcast dest type"); | 2281 llvm_unreachable("Unexpected Bitcast dest type"); |
| 2346 case IceType_i8: { | 2282 case IceType_i8: { |
| 2347 assert(Src0->getType() == IceType_v8i1); | 2283 llvm::report_fatal_error("Helper call was expected"); |
| 2348 InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1); | |
| 2349 Call->addArg(Src0); | |
| 2350 lowerCall(Call); | |
| 2351 } break; | 2284 } break; |
| 2352 case IceType_i16: { | 2285 case IceType_i16: { |
| 2353 assert(Src0->getType() == IceType_v16i1); | 2286 llvm::report_fatal_error("Helper call was expected"); |
| 2354 InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1); | |
| 2355 Call->addArg(Src0); | |
| 2356 lowerCall(Call); | |
| 2357 } break; | 2287 } break; |
| 2358 case IceType_i32: | 2288 case IceType_i32: |
| 2359 case IceType_f32: { | 2289 case IceType_f32: { |
| 2360 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2290 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2361 Type SrcType = Src0RM->getType(); | 2291 Type SrcType = Src0RM->getType(); |
| 2362 assert((DestTy == IceType_i32 && SrcType == IceType_f32) || | 2292 assert((DestTy == IceType_i32 && SrcType == IceType_f32) || |
| 2363 (DestTy == IceType_f32 && SrcType == IceType_i32)); | 2293 (DestTy == IceType_f32 && SrcType == IceType_i32)); |
| 2364 // a.i32 = bitcast b.f32 ==> | 2294 // a.i32 = bitcast b.f32 ==> |
| 2365 // t.f32 = b.f32 | 2295 // t.f32 = b.f32 |
| 2366 // s.f32 = spill t.f32 | 2296 // s.f32 = spill t.f32 |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2462 // SpillLo is considered a "use" of Spill so define Spill before it is | 2392 // SpillLo is considered a "use" of Spill so define Spill before it is |
| 2463 // used. | 2393 // used. |
| 2464 Context.insert(InstFakeDef::create(Func, Spill)); | 2394 Context.insert(InstFakeDef::create(Func, Spill)); |
| 2465 _store(T_Lo, SpillLo); | 2395 _store(T_Lo, SpillLo); |
| 2466 _mov(T_Hi, hiOperand(Src0)); | 2396 _mov(T_Hi, hiOperand(Src0)); |
| 2467 _store(T_Hi, SpillHi); | 2397 _store(T_Hi, SpillHi); |
| 2468 _movq(Dest, Spill); | 2398 _movq(Dest, Spill); |
| 2469 } | 2399 } |
| 2470 } break; | 2400 } break; |
| 2471 case IceType_v8i1: { | 2401 case IceType_v8i1: { |
| 2472 assert(Src0->getType() == IceType_i8); | 2402 llvm::report_fatal_error("Helper call was expected"); |
| 2473 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1); | |
| 2474 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); | |
| 2475 // Arguments to functions are required to be at least 32 bits wide. | |
| 2476 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); | |
| 2477 Call->addArg(Src0AsI32); | |
| 2478 lowerCall(Call); | |
| 2479 } break; | 2403 } break; |
| 2480 case IceType_v16i1: { | 2404 case IceType_v16i1: { |
| 2481 assert(Src0->getType() == IceType_i16); | 2405 llvm::report_fatal_error("Helper call was expected"); |
| 2482 InstCall *Call = makeHelperCall(H_bitcast_i16_16xi1, Dest, 1); | |
| 2483 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); | |
| 2484 // Arguments to functions are required to be at least 32 bits wide. | |
| 2485 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); | |
| 2486 Call->addArg(Src0AsI32); | |
| 2487 lowerCall(Call); | |
| 2488 } break; | 2406 } break; |
| 2489 case IceType_v8i16: | 2407 case IceType_v8i16: |
| 2490 case IceType_v16i8: | 2408 case IceType_v16i8: |
| 2491 case IceType_v4i32: | 2409 case IceType_v4i32: |
| 2492 case IceType_v4f32: { | 2410 case IceType_v4f32: { |
| 2493 _movp(Dest, legalizeToReg(Src0)); | 2411 _movp(Dest, legalizeToReg(Src0)); |
| 2494 } break; | 2412 } break; |
| 2495 } | 2413 } |
| 2496 break; | 2414 break; |
| 2497 } | 2415 } |
| (...skipping 2661 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5159 Type Ty = Dest->getType(); | 5077 Type Ty = Dest->getType(); |
| 5160 Type ElementTy = typeElementType(Ty); | 5078 Type ElementTy = typeElementType(Ty); |
| 5161 SizeT NumElements = typeNumElements(Ty); | 5079 SizeT NumElements = typeNumElements(Ty); |
| 5162 | 5080 |
| 5163 Operand *T = Ctx->getConstantUndef(Ty); | 5081 Operand *T = Ctx->getConstantUndef(Ty); |
| 5164 for (SizeT I = 0; I < NumElements; ++I) { | 5082 for (SizeT I = 0; I < NumElements; ++I) { |
| 5165 Constant *Index = Ctx->getConstantInt32(I); | 5083 Constant *Index = Ctx->getConstantInt32(I); |
| 5166 | 5084 |
| 5167 // Extract the next two inputs. | 5085 // Extract the next two inputs. |
| 5168 Variable *Op0 = Func->makeVariable(ElementTy); | 5086 Variable *Op0 = Func->makeVariable(ElementTy); |
| 5169 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index)); | 5087 Context.insert(InstExtractElement::create(Func, Op0, Src0, Index)); |
| 5170 Variable *Op1 = Func->makeVariable(ElementTy); | 5088 Variable *Op1 = Func->makeVariable(ElementTy); |
| 5171 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index)); | 5089 Context.insert(InstExtractElement::create(Func, Op1, Src1, Index)); |
| 5172 | 5090 |
| 5173 // Perform the arithmetic as a scalar operation. | 5091 // Perform the arithmetic as a scalar operation. |
| 5174 Variable *Res = Func->makeVariable(ElementTy); | 5092 Variable *Res = Func->makeVariable(ElementTy); |
| 5175 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1)); | 5093 auto *Arith = InstArithmetic::create(Func, Kind, Res, Op0, Op1); |
| 5094 Context.insert(Arith); | |
| 5095 // We might have created an operation that needed a helper call. | |
| 5096 genTargetHelperCallFor(Arith); | |
| 5176 | 5097 |
| 5177 // Insert the result into position. | 5098 // Insert the result into position. |
| 5178 Variable *DestT = Func->makeVariable(Ty); | 5099 Variable *DestT = Func->makeVariable(Ty); |
| 5179 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); | 5100 Context.insert(InstInsertElement::create(Func, DestT, T, Res, Index)); |
| 5180 T = DestT; | 5101 T = DestT; |
| 5181 } | 5102 } |
| 5182 | 5103 |
| 5183 lowerAssign(InstAssign::create(Func, Dest, T)); | 5104 Context.insert(InstAssign::create(Func, Dest, T)); |
| 5184 } | 5105 } |
| 5185 | 5106 |
| 5186 /// The following pattern occurs often in lowered C and C++ code: | 5107 /// The following pattern occurs often in lowered C and C++ code: |
| 5187 /// | 5108 /// |
| 5188 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 | 5109 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
| 5189 /// %cmp.ext = sext <n x i1> %cmp to <n x ty> | 5110 /// %cmp.ext = sext <n x i1> %cmp to <n x ty> |
| 5190 /// | 5111 /// |
| 5191 /// We can eliminate the sext operation by copying the result of pcmpeqd, | 5112 /// We can eliminate the sext operation by copying the result of pcmpeqd, |
| 5192 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the | 5113 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the |
| 5193 /// sext operation. | 5114 /// sext operation. |
| (...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5316 } | 5237 } |
| 5317 | 5238 |
| 5318 // Pause constant blinding or pooling, blinding or pooling will be done later | 5239 // Pause constant blinding or pooling, blinding or pooling will be done later |
| 5319 // during phi lowering assignments | 5240 // during phi lowering assignments |
| 5320 BoolFlagSaver B(RandomizationPoolingPaused, true); | 5241 BoolFlagSaver B(RandomizationPoolingPaused, true); |
| 5321 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>( | 5242 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>( |
| 5322 this, Context.getNode(), Func); | 5243 this, Context.getNode(), Func); |
| 5323 } | 5244 } |
| 5324 | 5245 |
| 5325 template <class Machine> | 5246 template <class Machine> |
| 5326 uint32_t | 5247 void TargetX86Base<Machine>::genTargetHelperCallFor(Inst *Instr) { |
| 5327 TargetX86Base<Machine>::getCallStackArgumentsSizeBytes(const InstCall *Instr) { | 5248 uint32_t StackArgumentsSize = 0; |
| 5249 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { | |
| 5250 const char *HelperName = nullptr; | |
| 5251 Variable *Dest = Arith->getDest(); | |
| 5252 Type DestTy = Dest->getType(); | |
| 5253 if (!Traits::Is64Bit && DestTy == IceType_i64) { | |
| 5254 switch (Arith->getOp()) { | |
| 5255 default: | |
| 5256 return; | |
| 5257 case InstArithmetic::Udiv: | |
| 5258 HelperName = H_udiv_i64; | |
| 5259 break; | |
| 5260 case InstArithmetic::Sdiv: | |
| 5261 HelperName = H_sdiv_i64; | |
| 5262 break; | |
| 5263 case InstArithmetic::Urem: | |
| 5264 HelperName = H_urem_i64; | |
| 5265 break; | |
| 5266 case InstArithmetic::Srem: | |
| 5267 HelperName = H_srem_i64; | |
| 5268 break; | |
| 5269 } | |
| 5270 } else if (isVectorType(DestTy)) { | |
| 5271 Variable *Dest = Arith->getDest(); | |
| 5272 Operand *Src0 = Arith->getSrc(0); | |
| 5273 Operand *Src1 = Arith->getSrc(1); | |
| 5274 switch (Arith->getOp()) { | |
| 5275 default: | |
| 5276 return; | |
| 5277 case InstArithmetic::Mul: | |
| 5278 if (DestTy == IceType_v16i8) { | |
| 5279 scalarizeArithmetic(Arith->getOp(), Dest, Src0, Src1); | |
| 5280 Arith->setDeleted(); | |
| 5281 } | |
| 5282 return; | |
| 5283 case InstArithmetic::Shl: | |
| 5284 case InstArithmetic::Lshr: | |
| 5285 case InstArithmetic::Ashr: | |
| 5286 case InstArithmetic::Udiv: | |
| 5287 case InstArithmetic::Urem: | |
| 5288 case InstArithmetic::Sdiv: | |
| 5289 case InstArithmetic::Srem: | |
| 5290 case InstArithmetic::Frem: | |
| 5291 scalarizeArithmetic(Arith->getOp(), Dest, Src0, Src1); | |
| 5292 Arith->setDeleted(); | |
| 5293 return; | |
| 5294 } | |
| 5295 } else { | |
| 5296 switch (Arith->getOp()) { | |
| 5297 default: | |
| 5298 return; | |
| 5299 case InstArithmetic::Frem: | |
| 5300 if (isFloat32Asserting32Or64(DestTy)) | |
| 5301 HelperName = H_frem_f32; | |
| 5302 else | |
| 5303 HelperName = H_frem_f64; | |
| 5304 } | |
| 5305 } | |
| 5306 constexpr SizeT MaxSrcs = 2; | |
| 5307 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); | |
| 5308 Call->addArg(Arith->getSrc(0)); | |
| 5309 Call->addArg(Arith->getSrc(1)); | |
| 5310 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); | |
| 5311 Context.insert(Call); | |
| 5312 Arith->setDeleted(); | |
| 5313 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { | |
| 5314 InstCast::OpKind CastKind = Cast->getCastKind(); | |
| 5315 Operand *Src0 = Cast->getSrc(0); | |
| 5316 const Type SrcType = Src0->getType(); | |
| 5317 Variable *Dest = Cast->getDest(); | |
| 5318 const Type DestTy = Dest->getType(); | |
| 5319 const char *HelperName = nullptr; | |
| 5320 switch (CastKind) { | |
| 5321 default: | |
| 5322 return; | |
| 5323 case InstCast::Fptosi: | |
| 5324 if (!Traits::Is64Bit && DestTy == IceType_i64) { | |
| 5325 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 | |
| 5326 : H_fptosi_f64_i64; | |
| 5327 } else { | |
| 5328 return; | |
| 5329 } | |
| 5330 break; | |
| 5331 case InstCast::Fptoui: | |
| 5332 if (isVectorType(DestTy)) { | |
| 5333 assert(DestTy == IceType_v4i32 && SrcType == IceType_v4f32); | |
| 5334 HelperName = H_fptoui_4xi32_f32; | |
| 5335 } else if (DestTy == IceType_i64 || | |
| 5336 (!Traits::Is64Bit && DestTy == IceType_i32)) { | |
| 5337 if (Traits::Is64Bit) { | |
| 5338 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 | |
| 5339 : H_fptoui_f64_i64; | |
| 5340 } else if (isInt32Asserting32Or64(DestTy)) { | |
| 5341 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 | |
| 5342 : H_fptoui_f64_i32; | |
| 5343 } else { | |
| 5344 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 | |
| 5345 : H_fptoui_f64_i64; | |
| 5346 } | |
| 5347 } else { | |
| 5348 return; | |
| 5349 } | |
| 5350 break; | |
| 5351 case InstCast::Sitofp: | |
| 5352 if (!Traits::Is64Bit && SrcType == IceType_i64) { | |
| 5353 HelperName = isFloat32Asserting32Or64(DestTy) ? H_sitofp_i64_f32 | |
| 5354 : H_sitofp_i64_f64; | |
| 5355 } else { | |
| 5356 return; | |
| 5357 } | |
| 5358 break; | |
| 5359 case InstCast::Uitofp: | |
| 5360 if (isVectorType(SrcType)) { | |
| 5361 assert(DestTy == IceType_v4f32 && SrcType == IceType_v4i32); | |
| 5362 HelperName = H_uitofp_4xi32_4xf32; | |
| 5363 } else if (SrcType == IceType_i64 || | |
| 5364 (!Traits::Is64Bit && SrcType == IceType_i32)) { | |
| 5365 if (isInt32Asserting32Or64(SrcType)) { | |
| 5366 HelperName = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i32_f32 | |
| 5367 : H_uitofp_i32_f64; | |
| 5368 } else { | |
| 5369 HelperName = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i64_f32 | |
| 5370 : H_uitofp_i64_f64; | |
| 5371 } | |
| 5372 } else { | |
| 5373 return; | |
| 5374 } | |
| 5375 break; | |
| 5376 case InstCast::Bitcast: { | |
| 5377 if (DestTy == Src0->getType()) | |
| 5378 return; | |
| 5379 switch (DestTy) { | |
| 5380 default: | |
| 5381 return; | |
| 5382 case IceType_i8: | |
| 5383 assert(Src0->getType() == IceType_v8i1); | |
| 5384 HelperName = H_bitcast_8xi1_i8; | |
| 5385 break; | |
| 5386 case IceType_i16: | |
| 5387 assert(Src0->getType() == IceType_v16i1); | |
| 5388 HelperName = H_bitcast_16xi1_i16; | |
| 5389 break; | |
| 5390 case IceType_v8i1: { | |
| 5391 assert(Src0->getType() == IceType_i8); | |
| 5392 HelperName = H_bitcast_i8_8xi1; | |
| 5393 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); | |
| 5394 // Arguments to functions are required to be at least 32 bits wide. | |
| 5395 Context.insert(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); | |
| 5396 Src0 = Src0AsI32; | |
| 5397 } break; | |
| 5398 case IceType_v16i1: { | |
| 5399 assert(Src0->getType() == IceType_i16); | |
| 5400 HelperName = H_bitcast_i16_16xi1; | |
| 5401 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); | |
| 5402 // Arguments to functions are required to be at least 32 bits wide. | |
| 5403 Context.insert(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); | |
| 5404 Src0 = Src0AsI32; | |
| 5405 } break; | |
| 5406 } | |
| 5407 } break; | |
| 5408 } | |
| 5409 constexpr SizeT MaxSrcs = 1; | |
| 5410 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); | |
| 5411 Call->addArg(Src0); | |
| 5412 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); | |
| 5413 Context.insert(Call); | |
| 5414 Cast->setDeleted(); | |
| 5415 } else if (auto *Intrinsic = llvm::dyn_cast<InstIntrinsicCall>(Instr)) { | |
| 5416 std::vector<Type> ArgTypes; | |
| 5417 Type ReturnType = IceType_void; | |
| 5418 switch (Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicInfo().ID) { | |
| 5419 default: | |
| 5420 return; | |
| 5421 case Intrinsics::Ctpop: { | |
| 5422 Operand *Val = Intrinsic->getArg(0); | |
| 5423 Type ValTy = Val->getType(); | |
| 5424 if (ValTy == IceType_i64) | |
| 5425 ArgTypes = {IceType_i64}; | |
| 5426 else | |
| 5427 ArgTypes = {IceType_i32}; | |
| 5428 ReturnType = IceType_i32; | |
| 5429 } break; | |
| 5430 case Intrinsics::Longjmp: | |
| 5431 ArgTypes = {IceType_i32, IceType_i32}; | |
| 5432 ReturnType = IceType_void; | |
| 5433 break; | |
| 5434 case Intrinsics::Memcpy: | |
| 5435 ArgTypes = {IceType_i32, IceType_i32, IceType_i32}; | |
| 5436 ReturnType = IceType_void; | |
| 5437 break; | |
| 5438 case Intrinsics::Memmove: | |
| 5439 ArgTypes = {IceType_i32, IceType_i32, IceType_i32}; | |
| 5440 ReturnType = IceType_void; | |
| 5441 break; | |
| 5442 case Intrinsics::Memset: | |
| 5443 ArgTypes = {IceType_i32, IceType_i32, IceType_i32}; | |
| 5444 ReturnType = IceType_void; | |
| 5445 break; | |
| 5446 case Intrinsics::NaClReadTP: | |
| 5447 ReturnType = IceType_i32; | |
| 5448 break; | |
| 5449 case Intrinsics::Setjmp: | |
| 5450 ArgTypes = {IceType_i32}; | |
| 5451 ReturnType = IceType_i32; | |
| 5452 break; | |
| 5453 } | |
| 5454 StackArgumentsSize = getCallStackArgumentsSizeBytes(ArgTypes, ReturnType); | |
| 5455 } else if (auto *Call = llvm::dyn_cast<InstCall>(Instr)) { | |
| 5456 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); | |
| 5457 } else if (auto *Ret = llvm::dyn_cast<InstRet>(Instr)) { | |
| 5458 if (!Ret->hasRetValue()) | |
| 5459 return; | |
| 5460 Operand *RetValue = Ret->getRetValue(); | |
| 5461 Type ReturnType = RetValue->getType(); | |
| 5462 if (!isScalarFloatingType(ReturnType)) | |
| 5463 return; | |
| 5464 StackArgumentsSize = typeWidthInBytes(ReturnType); | |
| 5465 } else { | |
| 5466 return; | |
| 5467 } | |
| 5468 StackArgumentsSize = Traits::applyStackAlignment(StackArgumentsSize); | |
| 5469 updateMaxOutArgsSizeBytes(StackArgumentsSize); | |
| 5470 } | |
| 5471 | |
| 5472 template <class Machine> | |
| 5473 uint32_t TargetX86Base<Machine>::getCallStackArgumentsSizeBytes( | |
| 5474 const std::vector<Type> &ArgTypes, Type ReturnType) { | |
| 5328 uint32_t OutArgumentsSizeBytes = 0; | 5475 uint32_t OutArgumentsSizeBytes = 0; |
| 5329 uint32_t XmmArgCount = 0; | 5476 uint32_t XmmArgCount = 0; |
| 5330 uint32_t GprArgCount = 0; | 5477 uint32_t GprArgCount = 0; |
| 5331 // Classify each argument operand according to the location where the | 5478 for (Type Ty : ArgTypes) { |
| 5332 // argument is passed. | |
| 5333 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | |
| 5334 Operand *Arg = Instr->getArg(i); | |
| 5335 Type Ty = Arg->getType(); | |
| 5336 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | 5479 // The PNaCl ABI requires the width of arguments to be at least 32 bits. |
| 5337 assert(typeWidthInBytes(Ty) >= 4); | 5480 assert(typeWidthInBytes(Ty) >= 4); |
| 5338 if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) { | 5481 if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) { |
| 5339 ++XmmArgCount; | 5482 ++XmmArgCount; |
| 5340 } else if (isScalarIntegerType(Ty) && | 5483 } else if (isScalarIntegerType(Ty) && |
| 5341 GprArgCount < Traits::X86_MAX_GPR_ARGS) { | 5484 GprArgCount < Traits::X86_MAX_GPR_ARGS) { |
| 5342 // The 64 bit ABI allows some integers to be passed in GPRs. | 5485 // The 64 bit ABI allows some integers to be passed in GPRs. |
| 5343 ++GprArgCount; | 5486 ++GprArgCount; |
| 5344 } else { | 5487 } else { |
| 5345 if (isVectorType(Arg->getType())) { | 5488 if (isVectorType(Ty)) { |
| 5346 OutArgumentsSizeBytes = | 5489 OutArgumentsSizeBytes = |
| 5347 Traits::applyStackAlignment(OutArgumentsSizeBytes); | 5490 Traits::applyStackAlignment(OutArgumentsSizeBytes); |
| 5348 } | 5491 } |
| 5349 OutArgumentsSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | 5492 OutArgumentsSizeBytes += typeWidthInBytesOnStack(Ty); |
| 5350 } | 5493 } |
| 5351 } | 5494 } |
| 5352 if (Traits::Is64Bit) | 5495 if (Traits::Is64Bit) |
| 5353 return OutArgumentsSizeBytes; | 5496 return OutArgumentsSizeBytes; |
| 5354 // The 32 bit ABI requires floating point values to be returned on the x87 FP | 5497 // The 32 bit ABI requires floating point values to be returned on the x87 FP |
| 5355 // stack. Ensure there is enough space for the fstp/movs for floating returns. | 5498 // stack. Ensure there is enough space for the fstp/movs for floating returns. |
| 5356 Variable *Dest = Instr->getDest(); | 5499 if (ReturnType == IceType_void) |
|
Jim Stichnoth
2015/11/26 18:32:06
This void test is redundant with the code below, c
sehr
2015/11/26 21:09:23
Removed.
| |
| 5357 if (Dest == nullptr) | |
| 5358 return OutArgumentsSizeBytes; | 5500 return OutArgumentsSizeBytes; |
| 5359 const Type DestType = Dest->getType(); | 5501 if (isScalarFloatingType(ReturnType)) { |
| 5360 if (isScalarFloatingType(Dest->getType())) { | |
| 5361 OutArgumentsSizeBytes = | 5502 OutArgumentsSizeBytes = |
| 5362 std::max(OutArgumentsSizeBytes, | 5503 std::max(OutArgumentsSizeBytes, |
| 5363 static_cast<uint32_t>(typeWidthInBytesOnStack(DestType))); | 5504 static_cast<uint32_t>(typeWidthInBytesOnStack(ReturnType))); |
| 5364 } | 5505 } |
| 5365 return OutArgumentsSizeBytes; | 5506 return OutArgumentsSizeBytes; |
| 5366 } | 5507 } |
| 5367 | 5508 |
| 5368 template <class Machine> | 5509 template <class Machine> |
| 5510 uint32_t | |
| 5511 TargetX86Base<Machine>::getCallStackArgumentsSizeBytes(const InstCall *Instr) { | |
| 5512 // Build a vector of the arguments' types. | |
| 5513 std::vector<Type> ArgTypes; | |
| 5514 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | |
| 5515 Operand *Arg = Instr->getArg(i); | |
| 5516 ArgTypes.emplace_back(Arg->getType()); | |
| 5517 } | |
| 5518 // Compute the return type (if any); | |
| 5519 Type ReturnType = IceType_void; | |
| 5520 Variable *Dest = Instr->getDest(); | |
| 5521 if (Dest != nullptr) | |
| 5522 ReturnType = Dest->getType(); | |
| 5523 return getCallStackArgumentsSizeBytes(ArgTypes, ReturnType); | |
| 5524 } | |
| 5525 | |
| 5526 template <class Machine> | |
| 5369 Variable *TargetX86Base<Machine>::makeZeroedRegister(Type Ty, int32_t RegNum) { | 5527 Variable *TargetX86Base<Machine>::makeZeroedRegister(Type Ty, int32_t RegNum) { |
| 5370 Variable *Reg = makeReg(Ty, RegNum); | 5528 Variable *Reg = makeReg(Ty, RegNum); |
| 5371 switch (Ty) { | 5529 switch (Ty) { |
| 5372 case IceType_i1: | 5530 case IceType_i1: |
| 5373 case IceType_i8: | 5531 case IceType_i8: |
| 5374 case IceType_i16: | 5532 case IceType_i16: |
| 5375 case IceType_i32: | 5533 case IceType_i32: |
| 5376 case IceType_i64: | 5534 case IceType_i64: |
| 5377 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. | 5535 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. |
| 5378 _mov(Reg, Ctx->getConstantZero(Ty)); | 5536 _mov(Reg, Ctx->getConstantZero(Ty)); |
| (...skipping 687 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6066 } | 6224 } |
| 6067 // the offset is not eligible for blinding or pooling, return the original | 6225 // the offset is not eligible for blinding or pooling, return the original |
| 6068 // mem operand | 6226 // mem operand |
| 6069 return MemOperand; | 6227 return MemOperand; |
| 6070 } | 6228 } |
| 6071 | 6229 |
| 6072 } // end of namespace X86Internal | 6230 } // end of namespace X86Internal |
| 6073 } // end of namespace Ice | 6231 } // end of namespace Ice |
| 6074 | 6232 |
| 6075 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 6233 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |