Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(12)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1472623002: Unify alloca, outgoing arg, and prolog construction (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fixed missing out args in subtraction. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 285 matching lines...) Expand 10 before | Expand all | Expand 10 after
296 } 296 }
297 297
298 template <class Machine> void TargetX86Base<Machine>::staticInit() { 298 template <class Machine> void TargetX86Base<Machine>::staticInit() {
299 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs); 299 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs);
300 } 300 }
301 301
302 template <class Machine> void TargetX86Base<Machine>::translateO2() { 302 template <class Machine> void TargetX86Base<Machine>::translateO2() {
303 TimerMarker T(TimerStack::TT_O2, Func); 303 TimerMarker T(TimerStack::TT_O2, Func);
304 304
305 genTargetHelperCalls(); 305 genTargetHelperCalls();
306 Func->dump("After target helper call insertion");
306 307
307 // Merge Alloca instructions, and lay out the stack. 308 // Merge Alloca instructions, and lay out the stack.
308 static constexpr bool SortAndCombineAllocas = true; 309 static constexpr bool SortAndCombineAllocas = true;
309 Func->processAllocas(SortAndCombineAllocas); 310 Func->processAllocas(SortAndCombineAllocas);
310 Func->dump("After Alloca processing"); 311 Func->dump("After Alloca processing");
311 312
312 if (!Ctx->getFlags().getPhiEdgeSplit()) { 313 if (!Ctx->getFlags().getPhiEdgeSplit()) {
313 // Lower Phi instructions. 314 // Lower Phi instructions.
314 Func->placePhiLoads(); 315 Func->placePhiLoads();
315 if (Func->hasError()) 316 if (Func->hasError())
(...skipping 635 matching lines...) Expand 10 before | Expand all | Expand 10 after
951 952
952 template <class Machine> 953 template <class Machine>
953 llvm::SmallBitVector 954 llvm::SmallBitVector
954 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, 955 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
955 RegSetMask Exclude) const { 956 RegSetMask Exclude) const {
956 return Traits::getRegisterSet(Include, Exclude); 957 return Traits::getRegisterSet(Include, Exclude);
957 } 958 }
958 959
959 template <class Machine> 960 template <class Machine>
960 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { 961 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
961 if (!Inst->getKnownFrameOffset())
962 setHasFramePointer();
963 // Conservatively require the stack to be aligned. Some stack adjustment 962 // Conservatively require the stack to be aligned. Some stack adjustment
964 // operations implemented below assume that the stack is aligned before the 963 // operations implemented below assume that the stack is aligned before the
965 // alloca. All the alloca code ensures that the stack alignment is preserved 964 // alloca. All the alloca code ensures that the stack alignment is preserved
966 // after the alloca. The stack alignment restriction can be relaxed in some 965 // after the alloca. The stack alignment restriction can be relaxed in some
967 // cases. 966 // cases.
968 NeedsStackAlignment = true; 967 NeedsStackAlignment = true;
969 968
970 // TODO(stichnot): minimize the number of adjustments of esp, etc.
971 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
972 Operand *TotalSize = legalize(Inst->getSizeInBytes());
973 Variable *Dest = Inst->getDest();
974 uint32_t AlignmentParam = Inst->getAlignInBytes();
975 // For default align=0, set it to the real value 1, to avoid any 969 // For default align=0, set it to the real value 1, to avoid any
976 // bit-manipulation problems below. 970 // bit-manipulation problems below.
977 AlignmentParam = std::max(AlignmentParam, 1u); 971 const uint32_t AlignmentParam = std::max(1u, Inst->getAlignInBytes());
978 972
979 // LLVM enforces power of 2 alignment. 973 // LLVM enforces power of 2 alignment.
980 assert(llvm::isPowerOf2_32(AlignmentParam)); 974 assert(llvm::isPowerOf2_32(AlignmentParam));
981 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); 975 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));
982 976
983 uint32_t Alignment = 977 const uint32_t Alignment =
984 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); 978 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
985 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) { 979 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES;
980 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1;
981 const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset();
982 const bool UseFramePointer =
983 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
984
985 if (UseFramePointer)
986 setHasFramePointer(); 986 setHasFramePointer();
987
988 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
989 if (OverAligned) {
987 _and(esp, Ctx->getConstantInt32(-Alignment)); 990 _and(esp, Ctx->getConstantInt32(-Alignment));
988 } 991 }
992
993 Variable *Dest = Inst->getDest();
994 Operand *TotalSize = legalize(Inst->getSizeInBytes());
995
989 if (const auto *ConstantTotalSize = 996 if (const auto *ConstantTotalSize =
990 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { 997 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
991 uint32_t Value = ConstantTotalSize->getValue(); 998 const uint32_t Value =
992 Value = Utils::applyAlignment(Value, Alignment); 999 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
993 if (Inst->getKnownFrameOffset()) { 1000 if (!UseFramePointer) {
994 _adjust_stack(Value); 1001 // If we don't need a Frame Pointer, this alloca has a known offset to the
1002 // stack pointer. We don't need adjust the stack pointer, nor assign any
1003 // value to Dest, as Dest is rematerializable.
1004 assert(Dest->isRematerializable());
995 FixedAllocaSizeBytes += Value; 1005 FixedAllocaSizeBytes += Value;
1006 Context.insert(InstFakeDef::create(Func, Dest));
996 } else { 1007 } else {
997 _sub(esp, Ctx->getConstantInt32(Value)); 1008 _sub(esp, Ctx->getConstantInt32(Value));
998 } 1009 }
999 } else { 1010 } else {
1000 // Non-constant sizes need to be adjusted to the next highest multiple of 1011 // Non-constant sizes need to be adjusted to the next highest multiple of
1001 // the required alignment at runtime. 1012 // the required alignment at runtime.
1002 Variable *T = makeReg(IceType_i32); 1013 Variable *T = makeReg(IceType_i32);
1003 _mov(T, TotalSize); 1014 _mov(T, TotalSize);
1004 _add(T, Ctx->getConstantInt32(Alignment - 1)); 1015 _add(T, Ctx->getConstantInt32(Alignment - 1));
1005 _and(T, Ctx->getConstantInt32(-Alignment)); 1016 _and(T, Ctx->getConstantInt32(-Alignment));
1006 _sub(esp, T); 1017 _sub(esp, T);
1007 } 1018 }
1008 _mov(Dest, esp); 1019 // Add enough to the returned address to account for the out args area.
1020 uint32_t OutArgsSize = maxOutArgsSizeBytes();
1021 if (OutArgsSize > 0) {
1022 Variable *T = makeReg(IceType_i32);
1023 typename Traits::X86OperandMem *CalculateOperand =
1024 Traits::X86OperandMem::create(
1025 Func, IceType_i32, esp,
1026 Ctx->getConstantInt(IceType_i32, OutArgsSize));
1027 _lea(T, CalculateOperand);
1028 _mov(Dest, T);
1029 } else {
1030 _mov(Dest, esp);
1031 }
1009 } 1032 }
1010 1033
1011 /// Strength-reduce scalar integer multiplication by a constant (for i32 or 1034 /// Strength-reduce scalar integer multiplication by a constant (for i32 or
1012 /// narrower) for certain constants. The lea instruction can be used to multiply 1035 /// narrower) for certain constants. The lea instruction can be used to multiply
1013 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of 1036 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of
1014 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 1037 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2
1015 /// lea-based multiplies by 5, combined with left-shifting by 2. 1038 /// lea-based multiplies by 5, combined with left-shifting by 2.
1016 template <class Machine> 1039 template <class Machine>
1017 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, 1040 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,
1018 int32_t Src1) { 1041 int32_t Src1) {
(...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after
1348 (void)SwapCount; 1371 (void)SwapCount;
1349 } 1372 }
1350 if (!Traits::Is64Bit && Ty == IceType_i64) { 1373 if (!Traits::Is64Bit && Ty == IceType_i64) {
1351 // These x86-32 helper-call-involved instructions are lowered in this 1374 // These x86-32 helper-call-involved instructions are lowered in this
1352 // separate switch. This is because loOperand() and hiOperand() may insert 1375 // separate switch. This is because loOperand() and hiOperand() may insert
1353 // redundant instructions for constant blinding and pooling. Such redundant 1376 // redundant instructions for constant blinding and pooling. Such redundant
1354 // instructions will fail liveness analysis under -Om1 setting. And, 1377 // instructions will fail liveness analysis under -Om1 setting. And,
1355 // actually these arguments do not need to be processed with loOperand() 1378 // actually these arguments do not need to be processed with loOperand()
1356 // and hiOperand() to be used. 1379 // and hiOperand() to be used.
1357 switch (Inst->getOp()) { 1380 switch (Inst->getOp()) {
1358 case InstArithmetic::Udiv: { 1381 case InstArithmetic::Udiv:
1359 constexpr SizeT MaxSrcs = 2; 1382 case InstArithmetic::Sdiv:
1360 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); 1383 case InstArithmetic::Urem:
1361 Call->addArg(Inst->getSrc(0)); 1384 case InstArithmetic::Srem:
1362 Call->addArg(Inst->getSrc(1)); 1385 llvm::report_fatal_error("Helper call was expected");
1363 lowerCall(Call);
1364 return; 1386 return;
1365 }
1366 case InstArithmetic::Sdiv: {
1367 constexpr SizeT MaxSrcs = 2;
1368 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);
1369 Call->addArg(Inst->getSrc(0));
1370 Call->addArg(Inst->getSrc(1));
1371 lowerCall(Call);
1372 return;
1373 }
1374 case InstArithmetic::Urem: {
1375 constexpr SizeT MaxSrcs = 2;
1376 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);
1377 Call->addArg(Inst->getSrc(0));
1378 Call->addArg(Inst->getSrc(1));
1379 lowerCall(Call);
1380 return;
1381 }
1382 case InstArithmetic::Srem: {
1383 constexpr SizeT MaxSrcs = 2;
1384 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);
1385 Call->addArg(Inst->getSrc(0));
1386 Call->addArg(Inst->getSrc(1));
1387 lowerCall(Call);
1388 return;
1389 }
1390 default: 1387 default:
1391 break; 1388 break;
1392 } 1389 }
1393 1390
1394 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1391 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1395 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1392 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1396 Operand *Src0Lo = loOperand(Src0); 1393 Operand *Src0Lo = loOperand(Src0);
1397 Operand *Src0Hi = hiOperand(Src0); 1394 Operand *Src0Hi = hiOperand(Src0);
1398 Operand *Src1Lo = loOperand(Src1); 1395 Operand *Src1Lo = loOperand(Src1);
1399 Operand *Src1Hi = hiOperand(Src1); 1396 Operand *Src1Hi = hiOperand(Src1);
(...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after
1574 Variable *T4 = makeReg(IceType_v4i32); 1571 Variable *T4 = makeReg(IceType_v4i32);
1575 _movp(T1, Src0); 1572 _movp(T1, Src0);
1576 _pshufd(T2, Src0, Mask1030); 1573 _pshufd(T2, Src0, Mask1030);
1577 _pshufd(T3, Src1, Mask1030); 1574 _pshufd(T3, Src1, Mask1030);
1578 _pmuludq(T1, Src1); 1575 _pmuludq(T1, Src1);
1579 _pmuludq(T2, T3); 1576 _pmuludq(T2, T3);
1580 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); 1577 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
1581 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213)); 1578 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
1582 _movp(Dest, T4); 1579 _movp(Dest, T4);
1583 } else if (Ty == IceType_v16i8) { 1580 } else if (Ty == IceType_v16i8) {
1584 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1581 llvm::report_fatal_error("Scalarized operation was expected");
1585 } else { 1582 } else {
1586 llvm::report_fatal_error("Invalid vector multiply type"); 1583 llvm::report_fatal_error("Invalid vector multiply type");
1587 } 1584 }
1588 } break; 1585 } break;
1589 case InstArithmetic::Shl: 1586 case InstArithmetic::Shl:
1590 case InstArithmetic::Lshr: 1587 case InstArithmetic::Lshr:
1591 case InstArithmetic::Ashr: 1588 case InstArithmetic::Ashr:
1592 case InstArithmetic::Udiv: 1589 case InstArithmetic::Udiv:
1593 case InstArithmetic::Urem: 1590 case InstArithmetic::Urem:
1594 case InstArithmetic::Sdiv: 1591 case InstArithmetic::Sdiv:
1595 case InstArithmetic::Srem: 1592 case InstArithmetic::Srem:
1596 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1593 llvm::report_fatal_error("Scalarized operation was expected");
1597 break; 1594 break;
1598 case InstArithmetic::Fadd: { 1595 case InstArithmetic::Fadd: {
1599 Variable *T = makeReg(Ty); 1596 Variable *T = makeReg(Ty);
1600 _movp(T, Src0); 1597 _movp(T, Src0);
1601 _addps(T, Src1); 1598 _addps(T, Src1);
1602 _movp(Dest, T); 1599 _movp(Dest, T);
1603 } break; 1600 } break;
1604 case InstArithmetic::Fsub: { 1601 case InstArithmetic::Fsub: {
1605 Variable *T = makeReg(Ty); 1602 Variable *T = makeReg(Ty);
1606 _movp(T, Src0); 1603 _movp(T, Src0);
1607 _subps(T, Src1); 1604 _subps(T, Src1);
1608 _movp(Dest, T); 1605 _movp(Dest, T);
1609 } break; 1606 } break;
1610 case InstArithmetic::Fmul: { 1607 case InstArithmetic::Fmul: {
1611 Variable *T = makeReg(Ty); 1608 Variable *T = makeReg(Ty);
1612 _movp(T, Src0); 1609 _movp(T, Src0);
1613 _mulps(T, Src0 == Src1 ? T : Src1); 1610 _mulps(T, Src0 == Src1 ? T : Src1);
1614 _movp(Dest, T); 1611 _movp(Dest, T);
1615 } break; 1612 } break;
1616 case InstArithmetic::Fdiv: { 1613 case InstArithmetic::Fdiv: {
1617 Variable *T = makeReg(Ty); 1614 Variable *T = makeReg(Ty);
1618 _movp(T, Src0); 1615 _movp(T, Src0);
1619 _divps(T, Src1); 1616 _divps(T, Src1);
1620 _movp(Dest, T); 1617 _movp(Dest, T);
1621 } break; 1618 } break;
1622 case InstArithmetic::Frem: 1619 case InstArithmetic::Frem:
1623 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1620 llvm::report_fatal_error("Scalarized operation was expected");
1624 break; 1621 break;
1625 } 1622 }
1626 return; 1623 return;
1627 } 1624 }
1628 Variable *T_edx = nullptr; 1625 Variable *T_edx = nullptr;
1629 Variable *T = nullptr; 1626 Variable *T = nullptr;
1630 switch (Inst->getOp()) { 1627 switch (Inst->getOp()) {
1631 case InstArithmetic::_num: 1628 case InstArithmetic::_num:
1632 llvm_unreachable("Unknown arithmetic operator"); 1629 llvm_unreachable("Unknown arithmetic operator");
1633 break; 1630 break;
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after
1884 case InstArithmetic::Fmul: 1881 case InstArithmetic::Fmul:
1885 _mov(T, Src0); 1882 _mov(T, Src0);
1886 _mulss(T, Src0 == Src1 ? T : Src1); 1883 _mulss(T, Src0 == Src1 ? T : Src1);
1887 _mov(Dest, T); 1884 _mov(Dest, T);
1888 break; 1885 break;
1889 case InstArithmetic::Fdiv: 1886 case InstArithmetic::Fdiv:
1890 _mov(T, Src0); 1887 _mov(T, Src0);
1891 _divss(T, Src1); 1888 _divss(T, Src1);
1892 _mov(Dest, T); 1889 _mov(Dest, T);
1893 break; 1890 break;
1894 case InstArithmetic::Frem: { 1891 case InstArithmetic::Frem:
1895 constexpr SizeT MaxSrcs = 2; 1892 llvm::report_fatal_error("Helper call was expected");
1896 InstCall *Call = makeHelperCall( 1893 break;
1897 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
1898 Call->addArg(Src0);
1899 Call->addArg(Src1);
1900 return lowerCall(Call);
1901 }
1902 } 1894 }
1903 } 1895 }
1904 1896
1905 template <class Machine> 1897 template <class Machine>
1906 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { 1898 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
1907 Variable *Dest = Inst->getDest(); 1899 Variable *Dest = Inst->getDest();
1908 if (Dest->isRematerializable()) { 1900 if (Dest->isRematerializable()) {
1909 Context.insert(InstFakeDef::create(Func, Dest)); 1901 Context.insert(InstFakeDef::create(Func, Dest));
1910 return; 1902 return;
1911 } 1903 }
(...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after
2154 if (isVectorType(DestTy)) { 2146 if (isVectorType(DestTy)) {
2155 assert(DestTy == IceType_v4i32 && 2147 assert(DestTy == IceType_v4i32 &&
2156 Inst->getSrc(0)->getType() == IceType_v4f32); 2148 Inst->getSrc(0)->getType() == IceType_v4f32);
2157 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2149 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2158 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2150 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2159 Src0RM = legalizeToReg(Src0RM); 2151 Src0RM = legalizeToReg(Src0RM);
2160 Variable *T = makeReg(DestTy); 2152 Variable *T = makeReg(DestTy);
2161 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); 2153 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
2162 _movp(Dest, T); 2154 _movp(Dest, T);
2163 } else if (!Traits::Is64Bit && DestTy == IceType_i64) { 2155 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
2164 constexpr SizeT MaxSrcs = 1; 2156 llvm::report_fatal_error("Helper call was expected");
2165 Type SrcType = Inst->getSrc(0)->getType();
2166 InstCall *Call =
2167 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
2168 : H_fptosi_f64_i64,
2169 Dest, MaxSrcs);
2170 Call->addArg(Inst->getSrc(0));
2171 lowerCall(Call);
2172 } else { 2157 } else {
2173 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2158 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2174 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2159 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2175 Variable *T_1 = nullptr; 2160 Variable *T_1 = nullptr;
2176 if (Traits::Is64Bit && DestTy == IceType_i64) { 2161 if (Traits::Is64Bit && DestTy == IceType_i64) {
2177 T_1 = makeReg(IceType_i64); 2162 T_1 = makeReg(IceType_i64);
2178 } else { 2163 } else {
2179 assert(DestTy != IceType_i64); 2164 assert(DestTy != IceType_i64);
2180 T_1 = makeReg(IceType_i32); 2165 T_1 = makeReg(IceType_i32);
2181 } 2166 }
2182 // cvt() requires its integer argument to be a GPR. 2167 // cvt() requires its integer argument to be a GPR.
2183 Variable *T_2 = makeReg(DestTy); 2168 Variable *T_2 = makeReg(DestTy);
2184 if (isByteSizedType(DestTy)) { 2169 if (isByteSizedType(DestTy)) {
2185 assert(T_1->getType() == IceType_i32); 2170 assert(T_1->getType() == IceType_i32);
2186 T_1->setRegClass(RCX86_Is32To8); 2171 T_1->setRegClass(RCX86_Is32To8);
2187 T_2->setRegClass(RCX86_IsTrunc8Rcvr); 2172 T_2->setRegClass(RCX86_IsTrunc8Rcvr);
2188 } 2173 }
2189 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); 2174 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
2190 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2175 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2191 if (DestTy == IceType_i1) 2176 if (DestTy == IceType_i1)
2192 _and(T_2, Ctx->getConstantInt1(1)); 2177 _and(T_2, Ctx->getConstantInt1(1));
2193 _mov(Dest, T_2); 2178 _mov(Dest, T_2);
2194 } 2179 }
2195 break; 2180 break;
2196 case InstCast::Fptoui: 2181 case InstCast::Fptoui:
2197 if (isVectorType(DestTy)) { 2182 if (isVectorType(DestTy)) {
2198 assert(DestTy == IceType_v4i32 && 2183 llvm::report_fatal_error("Helper call was expected");
2199 Inst->getSrc(0)->getType() == IceType_v4f32);
2200 constexpr SizeT MaxSrcs = 1;
2201 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
2202 Call->addArg(Inst->getSrc(0));
2203 lowerCall(Call);
2204 } else if (DestTy == IceType_i64 || 2184 } else if (DestTy == IceType_i64 ||
2205 (!Traits::Is64Bit && DestTy == IceType_i32)) { 2185 (!Traits::Is64Bit && DestTy == IceType_i32)) {
2206 // Use a helper for both x86-32 and x86-64. 2186 llvm::report_fatal_error("Helper call was expected");
2207 constexpr SizeT MaxSrcs = 1;
2208 Type SrcType = Inst->getSrc(0)->getType();
2209 IceString TargetString;
2210 if (Traits::Is64Bit) {
2211 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2212 : H_fptoui_f64_i64;
2213 } else if (isInt32Asserting32Or64(DestTy)) {
2214 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
2215 : H_fptoui_f64_i32;
2216 } else {
2217 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2218 : H_fptoui_f64_i64;
2219 }
2220 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2221 Call->addArg(Inst->getSrc(0));
2222 lowerCall(Call);
2223 return;
2224 } else { 2187 } else {
2225 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2188 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2226 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2189 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2227 assert(DestTy != IceType_i64); 2190 assert(DestTy != IceType_i64);
2228 Variable *T_1 = nullptr; 2191 Variable *T_1 = nullptr;
2229 if (Traits::Is64Bit && DestTy == IceType_i32) { 2192 if (Traits::Is64Bit && DestTy == IceType_i32) {
2230 T_1 = makeReg(IceType_i64); 2193 T_1 = makeReg(IceType_i64);
2231 } else { 2194 } else {
2232 assert(DestTy != IceType_i32); 2195 assert(DestTy != IceType_i32);
2233 T_1 = makeReg(IceType_i32); 2196 T_1 = makeReg(IceType_i32);
(...skipping 15 matching lines...) Expand all
2249 if (isVectorType(DestTy)) { 2212 if (isVectorType(DestTy)) {
2250 assert(DestTy == IceType_v4f32 && 2213 assert(DestTy == IceType_v4f32 &&
2251 Inst->getSrc(0)->getType() == IceType_v4i32); 2214 Inst->getSrc(0)->getType() == IceType_v4i32);
2252 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2215 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2253 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2216 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2254 Src0RM = legalizeToReg(Src0RM); 2217 Src0RM = legalizeToReg(Src0RM);
2255 Variable *T = makeReg(DestTy); 2218 Variable *T = makeReg(DestTy);
2256 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); 2219 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
2257 _movp(Dest, T); 2220 _movp(Dest, T);
2258 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { 2221 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
2259 // Use a helper for x86-32. 2222 llvm::report_fatal_error("Helper call was expected");
2260 constexpr SizeT MaxSrcs = 1;
2261 InstCall *Call =
2262 makeHelperCall(isFloat32Asserting32Or64(DestTy) ? H_sitofp_i64_f32
2263 : H_sitofp_i64_f64,
2264 Dest, MaxSrcs);
2265 // TODO: Call the correct compiler-rt helper function.
2266 Call->addArg(Inst->getSrc(0));
2267 lowerCall(Call);
2268 return;
2269 } else { 2223 } else {
2270 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2224 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2271 // Sign-extend the operand. 2225 // Sign-extend the operand.
2272 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 2226 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
2273 Variable *T_1 = nullptr; 2227 Variable *T_1 = nullptr;
2274 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) { 2228 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {
2275 T_1 = makeReg(IceType_i64); 2229 T_1 = makeReg(IceType_i64);
2276 } else { 2230 } else {
2277 assert(Src0RM->getType() != IceType_i64); 2231 assert(Src0RM->getType() != IceType_i64);
2278 T_1 = makeReg(IceType_i32); 2232 T_1 = makeReg(IceType_i32);
2279 } 2233 }
2280 Variable *T_2 = makeReg(DestTy); 2234 Variable *T_2 = makeReg(DestTy);
2281 if (Src0RM->getType() == T_1->getType()) 2235 if (Src0RM->getType() == T_1->getType())
2282 _mov(T_1, Src0RM); 2236 _mov(T_1, Src0RM);
2283 else 2237 else
2284 _movsx(T_1, Src0RM); 2238 _movsx(T_1, Src0RM);
2285 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); 2239 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
2286 _mov(Dest, T_2); 2240 _mov(Dest, T_2);
2287 } 2241 }
2288 break; 2242 break;
2289 case InstCast::Uitofp: { 2243 case InstCast::Uitofp: {
2290 Operand *Src0 = Inst->getSrc(0); 2244 Operand *Src0 = Inst->getSrc(0);
2291 if (isVectorType(Src0->getType())) { 2245 if (isVectorType(Src0->getType())) {
2292 assert(DestTy == IceType_v4f32 && Src0->getType() == IceType_v4i32); 2246 llvm::report_fatal_error("Helper call was expected");
2293 constexpr SizeT MaxSrcs = 1;
2294 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
2295 Call->addArg(Src0);
2296 lowerCall(Call);
2297 } else if (Src0->getType() == IceType_i64 || 2247 } else if (Src0->getType() == IceType_i64 ||
2298 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { 2248 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
2299 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on 2249 llvm::report_fatal_error("Helper call was expected");
2300 // x86-32.
2301 constexpr SizeT MaxSrcs = 1;
2302 IceString TargetString;
2303 if (isInt32Asserting32Or64(Src0->getType())) {
2304 TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i32_f32
2305 : H_uitofp_i32_f64;
2306 } else {
2307 TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i64_f32
2308 : H_uitofp_i64_f64;
2309 }
2310 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2311 Call->addArg(Src0);
2312 lowerCall(Call);
2313 return;
2314 } else { 2250 } else {
2315 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2251 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2316 // Zero-extend the operand. 2252 // Zero-extend the operand.
2317 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 2253 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2318 Variable *T_1 = nullptr; 2254 Variable *T_1 = nullptr;
2319 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) { 2255 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {
2320 T_1 = makeReg(IceType_i64); 2256 T_1 = makeReg(IceType_i64);
2321 } else { 2257 } else {
2322 assert(Src0RM->getType() != IceType_i64); 2258 assert(Src0RM->getType() != IceType_i64);
2323 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32); 2259 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
(...skipping 13 matching lines...) Expand all
2337 Operand *Src0 = Inst->getSrc(0); 2273 Operand *Src0 = Inst->getSrc(0);
2338 if (DestTy == Src0->getType()) { 2274 if (DestTy == Src0->getType()) {
2339 InstAssign *Assign = InstAssign::create(Func, Dest, Src0); 2275 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
2340 lowerAssign(Assign); 2276 lowerAssign(Assign);
2341 return; 2277 return;
2342 } 2278 }
2343 switch (DestTy) { 2279 switch (DestTy) {
2344 default: 2280 default:
2345 llvm_unreachable("Unexpected Bitcast dest type"); 2281 llvm_unreachable("Unexpected Bitcast dest type");
2346 case IceType_i8: { 2282 case IceType_i8: {
2347 assert(Src0->getType() == IceType_v8i1); 2283 llvm::report_fatal_error("Helper call was expected");
2348 InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1);
2349 Call->addArg(Src0);
2350 lowerCall(Call);
2351 } break; 2284 } break;
2352 case IceType_i16: { 2285 case IceType_i16: {
2353 assert(Src0->getType() == IceType_v16i1); 2286 llvm::report_fatal_error("Helper call was expected");
2354 InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1);
2355 Call->addArg(Src0);
2356 lowerCall(Call);
2357 } break; 2287 } break;
2358 case IceType_i32: 2288 case IceType_i32:
2359 case IceType_f32: { 2289 case IceType_f32: {
2360 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2290 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2361 Type SrcType = Src0RM->getType(); 2291 Type SrcType = Src0RM->getType();
2362 assert((DestTy == IceType_i32 && SrcType == IceType_f32) || 2292 assert((DestTy == IceType_i32 && SrcType == IceType_f32) ||
2363 (DestTy == IceType_f32 && SrcType == IceType_i32)); 2293 (DestTy == IceType_f32 && SrcType == IceType_i32));
2364 // a.i32 = bitcast b.f32 ==> 2294 // a.i32 = bitcast b.f32 ==>
2365 // t.f32 = b.f32 2295 // t.f32 = b.f32
2366 // s.f32 = spill t.f32 2296 // s.f32 = spill t.f32
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
2462 // SpillLo is considered a "use" of Spill so define Spill before it is 2392 // SpillLo is considered a "use" of Spill so define Spill before it is
2463 // used. 2393 // used.
2464 Context.insert(InstFakeDef::create(Func, Spill)); 2394 Context.insert(InstFakeDef::create(Func, Spill));
2465 _store(T_Lo, SpillLo); 2395 _store(T_Lo, SpillLo);
2466 _mov(T_Hi, hiOperand(Src0)); 2396 _mov(T_Hi, hiOperand(Src0));
2467 _store(T_Hi, SpillHi); 2397 _store(T_Hi, SpillHi);
2468 _movq(Dest, Spill); 2398 _movq(Dest, Spill);
2469 } 2399 }
2470 } break; 2400 } break;
2471 case IceType_v8i1: { 2401 case IceType_v8i1: {
2472 assert(Src0->getType() == IceType_i8); 2402 llvm::report_fatal_error("Helper call was expected");
2473 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);
2474 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
2475 // Arguments to functions are required to be at least 32 bits wide.
2476 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2477 Call->addArg(Src0AsI32);
2478 lowerCall(Call);
2479 } break; 2403 } break;
2480 case IceType_v16i1: { 2404 case IceType_v16i1: {
2481 assert(Src0->getType() == IceType_i16); 2405 llvm::report_fatal_error("Helper call was expected");
2482 InstCall *Call = makeHelperCall(H_bitcast_i16_16xi1, Dest, 1);
2483 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
2484 // Arguments to functions are required to be at least 32 bits wide.
2485 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2486 Call->addArg(Src0AsI32);
2487 lowerCall(Call);
2488 } break; 2406 } break;
2489 case IceType_v8i16: 2407 case IceType_v8i16:
2490 case IceType_v16i8: 2408 case IceType_v16i8:
2491 case IceType_v4i32: 2409 case IceType_v4i32:
2492 case IceType_v4f32: { 2410 case IceType_v4f32: {
2493 _movp(Dest, legalizeToReg(Src0)); 2411 _movp(Dest, legalizeToReg(Src0));
2494 } break; 2412 } break;
2495 } 2413 }
2496 break; 2414 break;
2497 } 2415 }
(...skipping 2661 matching lines...) Expand 10 before | Expand all | Expand 10 after
5159 Type Ty = Dest->getType(); 5077 Type Ty = Dest->getType();
5160 Type ElementTy = typeElementType(Ty); 5078 Type ElementTy = typeElementType(Ty);
5161 SizeT NumElements = typeNumElements(Ty); 5079 SizeT NumElements = typeNumElements(Ty);
5162 5080
5163 Operand *T = Ctx->getConstantUndef(Ty); 5081 Operand *T = Ctx->getConstantUndef(Ty);
5164 for (SizeT I = 0; I < NumElements; ++I) { 5082 for (SizeT I = 0; I < NumElements; ++I) {
5165 Constant *Index = Ctx->getConstantInt32(I); 5083 Constant *Index = Ctx->getConstantInt32(I);
5166 5084
5167 // Extract the next two inputs. 5085 // Extract the next two inputs.
5168 Variable *Op0 = Func->makeVariable(ElementTy); 5086 Variable *Op0 = Func->makeVariable(ElementTy);
5169 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index)); 5087 Context.insert(InstExtractElement::create(Func, Op0, Src0, Index));
5170 Variable *Op1 = Func->makeVariable(ElementTy); 5088 Variable *Op1 = Func->makeVariable(ElementTy);
5171 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index)); 5089 Context.insert(InstExtractElement::create(Func, Op1, Src1, Index));
5172 5090
5173 // Perform the arithmetic as a scalar operation. 5091 // Perform the arithmetic as a scalar operation.
5174 Variable *Res = Func->makeVariable(ElementTy); 5092 Variable *Res = Func->makeVariable(ElementTy);
5175 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1)); 5093 auto *Arith = InstArithmetic::create(Func, Kind, Res, Op0, Op1);
5094 Context.insert(Arith);
5095 // We might have created an operation that needed a helper call.
5096 genTargetHelperCallFor(Arith);
5176 5097
5177 // Insert the result into position. 5098 // Insert the result into position.
5178 Variable *DestT = Func->makeVariable(Ty); 5099 Variable *DestT = Func->makeVariable(Ty);
5179 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); 5100 Context.insert(InstInsertElement::create(Func, DestT, T, Res, Index));
5180 T = DestT; 5101 T = DestT;
5181 } 5102 }
5182 5103
5183 lowerAssign(InstAssign::create(Func, Dest, T)); 5104 Context.insert(InstAssign::create(Func, Dest, T));
5184 } 5105 }
5185 5106
5186 /// The following pattern occurs often in lowered C and C++ code: 5107 /// The following pattern occurs often in lowered C and C++ code:
5187 /// 5108 ///
5188 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 5109 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
5189 /// %cmp.ext = sext <n x i1> %cmp to <n x ty> 5110 /// %cmp.ext = sext <n x i1> %cmp to <n x ty>
5190 /// 5111 ///
5191 /// We can eliminate the sext operation by copying the result of pcmpeqd, 5112 /// We can eliminate the sext operation by copying the result of pcmpeqd,
5192 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the 5113 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the
5193 /// sext operation. 5114 /// sext operation.
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
5316 } 5237 }
5317 5238
5318 // Pause constant blinding or pooling, blinding or pooling will be done later 5239 // Pause constant blinding or pooling, blinding or pooling will be done later
5319 // during phi lowering assignments 5240 // during phi lowering assignments
5320 BoolFlagSaver B(RandomizationPoolingPaused, true); 5241 BoolFlagSaver B(RandomizationPoolingPaused, true);
5321 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>( 5242 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
5322 this, Context.getNode(), Func); 5243 this, Context.getNode(), Func);
5323 } 5244 }
5324 5245
5325 template <class Machine> 5246 template <class Machine>
5326 uint32_t 5247 void TargetX86Base<Machine>::genTargetHelperCallFor(Inst *Instr) {
5327 TargetX86Base<Machine>::getCallStackArgumentsSizeBytes(const InstCall *Instr) { 5248 uint32_t StackArgumentsSize = 0;
5249 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5250 const char *HelperName = nullptr;
5251 Variable *Dest = Arith->getDest();
5252 Type DestTy = Dest->getType();
5253 if (!Traits::Is64Bit && DestTy == IceType_i64) {
5254 switch (Arith->getOp()) {
5255 default:
5256 return;
5257 case InstArithmetic::Udiv:
5258 HelperName = H_udiv_i64;
5259 break;
5260 case InstArithmetic::Sdiv:
5261 HelperName = H_sdiv_i64;
5262 break;
5263 case InstArithmetic::Urem:
5264 HelperName = H_urem_i64;
5265 break;
5266 case InstArithmetic::Srem:
5267 HelperName = H_srem_i64;
5268 break;
5269 }
5270 } else if (isVectorType(DestTy)) {
5271 Variable *Dest = Arith->getDest();
5272 Operand *Src0 = Arith->getSrc(0);
5273 Operand *Src1 = Arith->getSrc(1);
5274 switch (Arith->getOp()) {
5275 default:
5276 return;
5277 case InstArithmetic::Mul:
5278 if (DestTy == IceType_v16i8) {
5279 scalarizeArithmetic(Arith->getOp(), Dest, Src0, Src1);
5280 Arith->setDeleted();
5281 }
5282 return;
5283 case InstArithmetic::Shl:
5284 case InstArithmetic::Lshr:
5285 case InstArithmetic::Ashr:
5286 case InstArithmetic::Udiv:
5287 case InstArithmetic::Urem:
5288 case InstArithmetic::Sdiv:
5289 case InstArithmetic::Srem:
5290 case InstArithmetic::Frem:
5291 scalarizeArithmetic(Arith->getOp(), Dest, Src0, Src1);
5292 Arith->setDeleted();
5293 return;
5294 }
5295 } else {
5296 switch (Arith->getOp()) {
5297 default:
5298 return;
5299 case InstArithmetic::Frem:
5300 if (isFloat32Asserting32Or64(DestTy))
5301 HelperName = H_frem_f32;
5302 else
5303 HelperName = H_frem_f64;
5304 }
5305 }
5306 constexpr SizeT MaxSrcs = 2;
5307 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
5308 Call->addArg(Arith->getSrc(0));
5309 Call->addArg(Arith->getSrc(1));
5310 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);
5311 Context.insert(Call);
5312 Arith->setDeleted();
5313 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
5314 InstCast::OpKind CastKind = Cast->getCastKind();
5315 Operand *Src0 = Cast->getSrc(0);
5316 const Type SrcType = Src0->getType();
5317 Variable *Dest = Cast->getDest();
5318 const Type DestTy = Dest->getType();
5319 const char *HelperName = nullptr;
5320 switch (CastKind) {
5321 default:
5322 return;
5323 case InstCast::Fptosi:
5324 if (!Traits::Is64Bit && DestTy == IceType_i64) {
5325 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
5326 : H_fptosi_f64_i64;
5327 } else {
5328 return;
5329 }
5330 break;
5331 case InstCast::Fptoui:
5332 if (isVectorType(DestTy)) {
5333 assert(DestTy == IceType_v4i32 && SrcType == IceType_v4f32);
5334 HelperName = H_fptoui_4xi32_f32;
5335 } else if (DestTy == IceType_i64 ||
5336 (!Traits::Is64Bit && DestTy == IceType_i32)) {
5337 if (Traits::Is64Bit) {
5338 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
5339 : H_fptoui_f64_i64;
5340 } else if (isInt32Asserting32Or64(DestTy)) {
5341 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
5342 : H_fptoui_f64_i32;
5343 } else {
5344 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
5345 : H_fptoui_f64_i64;
5346 }
5347 } else {
5348 return;
5349 }
5350 break;
5351 case InstCast::Sitofp:
5352 if (!Traits::Is64Bit && SrcType == IceType_i64) {
5353 HelperName = isFloat32Asserting32Or64(DestTy) ? H_sitofp_i64_f32
5354 : H_sitofp_i64_f64;
5355 } else {
5356 return;
5357 }
5358 break;
5359 case InstCast::Uitofp:
5360 if (isVectorType(SrcType)) {
5361 assert(DestTy == IceType_v4f32 && SrcType == IceType_v4i32);
5362 HelperName = H_uitofp_4xi32_4xf32;
5363 } else if (SrcType == IceType_i64 ||
5364 (!Traits::Is64Bit && SrcType == IceType_i32)) {
5365 if (isInt32Asserting32Or64(SrcType)) {
5366 HelperName = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i32_f32
5367 : H_uitofp_i32_f64;
5368 } else {
5369 HelperName = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i64_f32
5370 : H_uitofp_i64_f64;
5371 }
5372 } else {
5373 return;
5374 }
5375 break;
5376 case InstCast::Bitcast: {
5377 if (DestTy == Src0->getType())
5378 return;
5379 switch (DestTy) {
5380 default:
5381 return;
5382 case IceType_i8:
5383 assert(Src0->getType() == IceType_v8i1);
5384 HelperName = H_bitcast_8xi1_i8;
5385 break;
5386 case IceType_i16:
5387 assert(Src0->getType() == IceType_v16i1);
5388 HelperName = H_bitcast_16xi1_i16;
5389 break;
5390 case IceType_v8i1: {
5391 assert(Src0->getType() == IceType_i8);
5392 HelperName = H_bitcast_i8_8xi1;
5393 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
5394 // Arguments to functions are required to be at least 32 bits wide.
5395 Context.insert(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
5396 Src0 = Src0AsI32;
5397 } break;
5398 case IceType_v16i1: {
5399 assert(Src0->getType() == IceType_i16);
5400 HelperName = H_bitcast_i16_16xi1;
5401 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
5402 // Arguments to functions are required to be at least 32 bits wide.
5403 Context.insert(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
5404 Src0 = Src0AsI32;
5405 } break;
5406 }
5407 } break;
5408 }
5409 constexpr SizeT MaxSrcs = 1;
5410 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
5411 Call->addArg(Src0);
5412 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);
5413 Context.insert(Call);
5414 Cast->setDeleted();
5415 } else if (auto *Intrinsic = llvm::dyn_cast<InstIntrinsicCall>(Instr)) {
5416 std::vector<Type> ArgTypes;
5417 Type ReturnType = IceType_void;
5418 switch (Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicInfo().ID) {
5419 default:
5420 return;
5421 case Intrinsics::Ctpop: {
5422 Operand *Val = Intrinsic->getArg(0);
5423 Type ValTy = Val->getType();
5424 if (ValTy == IceType_i64)
5425 ArgTypes = {IceType_i64};
5426 else
5427 ArgTypes = {IceType_i32};
5428 ReturnType = IceType_i32;
5429 } break;
5430 case Intrinsics::Longjmp:
5431 ArgTypes = {IceType_i32, IceType_i32};
5432 ReturnType = IceType_void;
5433 break;
5434 case Intrinsics::Memcpy:
5435 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};
5436 ReturnType = IceType_void;
5437 break;
5438 case Intrinsics::Memmove:
5439 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};
5440 ReturnType = IceType_void;
5441 break;
5442 case Intrinsics::Memset:
5443 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};
5444 ReturnType = IceType_void;
5445 break;
5446 case Intrinsics::NaClReadTP:
5447 ReturnType = IceType_i32;
5448 break;
5449 case Intrinsics::Setjmp:
5450 ArgTypes = {IceType_i32};
5451 ReturnType = IceType_i32;
5452 break;
5453 }
5454 StackArgumentsSize = getCallStackArgumentsSizeBytes(ArgTypes, ReturnType);
5455 } else if (auto *Call = llvm::dyn_cast<InstCall>(Instr)) {
5456 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);
5457 } else if (auto *Ret = llvm::dyn_cast<InstRet>(Instr)) {
5458 if (!Ret->hasRetValue())
5459 return;
5460 Operand *RetValue = Ret->getRetValue();
5461 Type ReturnType = RetValue->getType();
5462 if (!isScalarFloatingType(ReturnType))
5463 return;
5464 StackArgumentsSize = typeWidthInBytes(ReturnType);
5465 } else {
5466 return;
5467 }
5468 StackArgumentsSize = Traits::applyStackAlignment(StackArgumentsSize);
5469 updateMaxOutArgsSizeBytes(StackArgumentsSize);
5470 }
5471
5472 template <class Machine>
5473 uint32_t TargetX86Base<Machine>::getCallStackArgumentsSizeBytes(
5474 const std::vector<Type> &ArgTypes, Type ReturnType) {
5328 uint32_t OutArgumentsSizeBytes = 0; 5475 uint32_t OutArgumentsSizeBytes = 0;
5329 uint32_t XmmArgCount = 0; 5476 uint32_t XmmArgCount = 0;
5330 uint32_t GprArgCount = 0; 5477 uint32_t GprArgCount = 0;
5331 // Classify each argument operand according to the location where the 5478 for (Type Ty : ArgTypes) {
5332 // argument is passed.
5333 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
5334 Operand *Arg = Instr->getArg(i);
5335 Type Ty = Arg->getType();
5336 // The PNaCl ABI requires the width of arguments to be at least 32 bits. 5479 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
5337 assert(typeWidthInBytes(Ty) >= 4); 5480 assert(typeWidthInBytes(Ty) >= 4);
5338 if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) { 5481 if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) {
5339 ++XmmArgCount; 5482 ++XmmArgCount;
5340 } else if (isScalarIntegerType(Ty) && 5483 } else if (isScalarIntegerType(Ty) &&
5341 GprArgCount < Traits::X86_MAX_GPR_ARGS) { 5484 GprArgCount < Traits::X86_MAX_GPR_ARGS) {
5342 // The 64 bit ABI allows some integers to be passed in GPRs. 5485 // The 64 bit ABI allows some integers to be passed in GPRs.
5343 ++GprArgCount; 5486 ++GprArgCount;
5344 } else { 5487 } else {
5345 if (isVectorType(Arg->getType())) { 5488 if (isVectorType(Ty)) {
5346 OutArgumentsSizeBytes = 5489 OutArgumentsSizeBytes =
5347 Traits::applyStackAlignment(OutArgumentsSizeBytes); 5490 Traits::applyStackAlignment(OutArgumentsSizeBytes);
5348 } 5491 }
5349 OutArgumentsSizeBytes += typeWidthInBytesOnStack(Arg->getType()); 5492 OutArgumentsSizeBytes += typeWidthInBytesOnStack(Ty);
5350 } 5493 }
5351 } 5494 }
5352 if (Traits::Is64Bit) 5495 if (Traits::Is64Bit)
5353 return OutArgumentsSizeBytes; 5496 return OutArgumentsSizeBytes;
5354 // The 32 bit ABI requires floating point values to be returned on the x87 FP 5497 // The 32 bit ABI requires floating point values to be returned on the x87 FP
5355 // stack. Ensure there is enough space for the fstp/movs for floating returns. 5498 // stack. Ensure there is enough space for the fstp/movs for floating returns.
5356 Variable *Dest = Instr->getDest(); 5499 if (ReturnType == IceType_void)
Jim Stichnoth 2015/11/26 18:32:06 This void test is redundant with the code below, c
sehr 2015/11/26 21:09:23 Removed.
5357 if (Dest == nullptr)
5358 return OutArgumentsSizeBytes; 5500 return OutArgumentsSizeBytes;
5359 const Type DestType = Dest->getType(); 5501 if (isScalarFloatingType(ReturnType)) {
5360 if (isScalarFloatingType(Dest->getType())) {
5361 OutArgumentsSizeBytes = 5502 OutArgumentsSizeBytes =
5362 std::max(OutArgumentsSizeBytes, 5503 std::max(OutArgumentsSizeBytes,
5363 static_cast<uint32_t>(typeWidthInBytesOnStack(DestType))); 5504 static_cast<uint32_t>(typeWidthInBytesOnStack(ReturnType)));
5364 } 5505 }
5365 return OutArgumentsSizeBytes; 5506 return OutArgumentsSizeBytes;
5366 } 5507 }
5367 5508
5368 template <class Machine> 5509 template <class Machine>
5510 uint32_t
5511 TargetX86Base<Machine>::getCallStackArgumentsSizeBytes(const InstCall *Instr) {
5512 // Build a vector of the arguments' types.
5513 std::vector<Type> ArgTypes;
5514 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
5515 Operand *Arg = Instr->getArg(i);
5516 ArgTypes.emplace_back(Arg->getType());
5517 }
5518 // Compute the return type (if any);
5519 Type ReturnType = IceType_void;
5520 Variable *Dest = Instr->getDest();
5521 if (Dest != nullptr)
5522 ReturnType = Dest->getType();
5523 return getCallStackArgumentsSizeBytes(ArgTypes, ReturnType);
5524 }
5525
5526 template <class Machine>
5369 Variable *TargetX86Base<Machine>::makeZeroedRegister(Type Ty, int32_t RegNum) { 5527 Variable *TargetX86Base<Machine>::makeZeroedRegister(Type Ty, int32_t RegNum) {
5370 Variable *Reg = makeReg(Ty, RegNum); 5528 Variable *Reg = makeReg(Ty, RegNum);
5371 switch (Ty) { 5529 switch (Ty) {
5372 case IceType_i1: 5530 case IceType_i1:
5373 case IceType_i8: 5531 case IceType_i8:
5374 case IceType_i16: 5532 case IceType_i16:
5375 case IceType_i32: 5533 case IceType_i32:
5376 case IceType_i64: 5534 case IceType_i64:
5377 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. 5535 // Conservatively do "mov reg, 0" to avoid modifying FLAGS.
5378 _mov(Reg, Ctx->getConstantZero(Ty)); 5536 _mov(Reg, Ctx->getConstantZero(Ty));
(...skipping 687 matching lines...) Expand 10 before | Expand all | Expand 10 after
6066 } 6224 }
6067 // the offset is not eligible for blinding or pooling, return the original 6225 // the offset is not eligible for blinding or pooling, return the original
6068 // mem operand 6226 // mem operand
6069 return MemOperand; 6227 return MemOperand;
6070 } 6228 }
6071 6229
6072 } // end of namespace X86Internal 6230 } // end of namespace X86Internal
6073 } // end of namespace Ice 6231 } // end of namespace Ice
6074 6232
6075 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 6233 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698