| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 1832 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1843 if (!llvm::isa<Constant>(Src1)) | 1843 if (!llvm::isa<Constant>(Src1)) |
| 1844 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); | 1844 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); |
| 1845 _sar(T, Src1); | 1845 _sar(T, Src1); |
| 1846 _mov(Dest, T); | 1846 _mov(Dest, T); |
| 1847 break; | 1847 break; |
| 1848 case InstArithmetic::Udiv: | 1848 case InstArithmetic::Udiv: |
| 1849 // div and idiv are the few arithmetic operators that do not allow | 1849 // div and idiv are the few arithmetic operators that do not allow |
| 1850 // immediates as the operand. | 1850 // immediates as the operand. |
| 1851 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1851 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1852 if (isByteSizedArithType(Dest->getType())) { | 1852 if (isByteSizedArithType(Dest->getType())) { |
| 1853 Variable *T_ah = nullptr; | 1853 // For 8-bit unsigned division we need to zero-extend al into ah. A mov |
| 1854 Constant *Zero = Ctx->getConstantZero(IceType_i8); | 1854 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64 |
| 1855 // assembler refuses to encode %ah (encoding %spl with a REX prefix |
| 1856 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah |
| 1857 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and |
| 1858 // d[%lh], which means the X86 target lowering (and the register |
| 1859 // allocator) would have to be aware of this restriction. For now, we |
| 1860 // simply zero %eax completely, and move the dividend into %al. |
| 1861 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 1862 Context.insert(InstFakeDef::create(Func, T_eax)); |
| 1863 _xor(T_eax, T_eax); |
| 1855 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1864 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1856 _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah); | 1865 _div(T, Src1, T); |
| 1857 _div(T, Src1, T_ah); | |
| 1858 _mov(Dest, T); | 1866 _mov(Dest, T); |
| 1867 Context.insert(InstFakeUse::create(Func, T_eax)); |
| 1859 } else { | 1868 } else { |
| 1860 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1869 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1861 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1870 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1862 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); | 1871 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); |
| 1863 _div(T, Src1, T_edx); | 1872 _div(T, Src1, T_edx); |
| 1864 _mov(Dest, T); | 1873 _mov(Dest, T); |
| 1865 } | 1874 } |
| 1866 break; | 1875 break; |
| 1867 case InstArithmetic::Sdiv: | 1876 case InstArithmetic::Sdiv: |
| 1868 // TODO(stichnot): Enable this after doing better performance | 1877 // TODO(stichnot): Enable this after doing better performance |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1910 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 1919 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| 1911 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1920 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1912 _cbwdq(T_edx, T); | 1921 _cbwdq(T_edx, T); |
| 1913 _idiv(T, Src1, T_edx); | 1922 _idiv(T, Src1, T_edx); |
| 1914 _mov(Dest, T); | 1923 _mov(Dest, T); |
| 1915 } | 1924 } |
| 1916 break; | 1925 break; |
| 1917 case InstArithmetic::Urem: | 1926 case InstArithmetic::Urem: |
| 1918 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1927 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1919 if (isByteSizedArithType(Dest->getType())) { | 1928 if (isByteSizedArithType(Dest->getType())) { |
| 1920 Variable *T_ah = nullptr; | 1929 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 1921 Constant *Zero = Ctx->getConstantZero(IceType_i8); | 1930 Context.insert(InstFakeDef::create(Func, T_eax)); |
| 1931 _xor(T_eax, T_eax); |
| 1922 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1932 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1923 _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah); | 1933 Variable *T_al = makeReg(IceType_i8, Traits::RegisterSet::Reg_eax); |
| 1924 _div(T_ah, Src1, T); | 1934 _div(T_al, Src1, T); |
| 1925 _mov(Dest, T_ah); | 1935 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't |
| 1936 // mov %ah, %al because it would make x86-64 codegen more complicated. If |
| 1937 // this ever becomes a problem we can introduce a pseudo rem instruction |
| 1938 // that returns the remainder in %al directly (and uses a mov for copying |
| 1939 // %ah to %al.) |
| 1940 static constexpr uint8_t AlSizeInBits = 8; |
| 1941 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); |
| 1942 _mov(Dest, T_al); |
| 1943 Context.insert(InstFakeUse::create(Func, T_eax)); |
| 1926 } else { | 1944 } else { |
| 1927 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1945 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1928 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); | 1946 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); |
| 1929 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1947 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1930 _div(T_edx, Src1, T); | 1948 _div(T_edx, Src1, T); |
| 1931 _mov(Dest, T_edx); | 1949 _mov(Dest, T_edx); |
| 1932 } | 1950 } |
| 1933 break; | 1951 break; |
| 1934 case InstArithmetic::Srem: | 1952 case InstArithmetic::Srem: |
| 1935 // TODO(stichnot): Enable this after doing better performance | 1953 // TODO(stichnot): Enable this after doing better performance |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1967 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); | 1985 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); |
| 1968 _sub(T, Src0); | 1986 _sub(T, Src0); |
| 1969 _neg(T); | 1987 _neg(T); |
| 1970 _mov(Dest, T); | 1988 _mov(Dest, T); |
| 1971 return; | 1989 return; |
| 1972 } | 1990 } |
| 1973 } | 1991 } |
| 1974 } | 1992 } |
| 1975 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1993 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1976 if (isByteSizedArithType(Dest->getType())) { | 1994 if (isByteSizedArithType(Dest->getType())) { |
| 1977 Variable *T_ah = makeReg(IceType_i8, Traits::RegisterSet::Reg_ah); | |
| 1978 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1995 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1996 // T is %al. |
| 1979 _cbwdq(T, T); | 1997 _cbwdq(T, T); |
| 1980 Context.insert(InstFakeDef::create(Func, T_ah)); | 1998 _idiv(T, Src1, T); |
| 1981 _idiv(T_ah, Src1, T); | 1999 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 1982 _mov(Dest, T_ah); | 2000 Context.insert(InstFakeDef::create(Func, T_eax)); |
| 2001 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't |
| 2002 // mov %ah, %al because it would make x86-64 codegen more complicated. If |
| 2003 // this ever becomes a problem we can introduce a pseudo rem instruction |
| 2004 // that returns the remainder in %al directly (and uses a mov for copying |
| 2005 // %ah to %al.) |
| 2006 static constexpr uint8_t AlSizeInBits = 8; |
| 2007 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); |
| 2008 _mov(Dest, T); |
| 2009 Context.insert(InstFakeUse::create(Func, T_eax)); |
| 1983 } else { | 2010 } else { |
| 1984 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 2011 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| 1985 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 2012 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1986 _cbwdq(T_edx, T); | 2013 _cbwdq(T_edx, T); |
| 1987 _idiv(T_edx, Src1, T); | 2014 _idiv(T_edx, Src1, T); |
| 1988 _mov(Dest, T_edx); | 2015 _mov(Dest, T_edx); |
| 1989 } | 2016 } |
| 1990 break; | 2017 break; |
| 1991 case InstArithmetic::Fadd: | 2018 case InstArithmetic::Fadd: |
| 1992 _mov(T, Src0); | 2019 _mov(T, Src0); |
| (...skipping 3623 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5616 } | 5643 } |
| 5617 // the offset is not eligible for blinding or pooling, return the original | 5644 // the offset is not eligible for blinding or pooling, return the original |
| 5618 // mem operand | 5645 // mem operand |
| 5619 return MemOperand; | 5646 return MemOperand; |
| 5620 } | 5647 } |
| 5621 | 5648 |
| 5622 } // end of namespace X86Internal | 5649 } // end of namespace X86Internal |
| 5623 } // end of namespace Ice | 5650 } // end of namespace Ice |
| 5624 | 5651 |
| 5625 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5652 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |