OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 1832 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1843 if (!llvm::isa<Constant>(Src1)) | 1843 if (!llvm::isa<Constant>(Src1)) |
1844 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); | 1844 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); |
1845 _sar(T, Src1); | 1845 _sar(T, Src1); |
1846 _mov(Dest, T); | 1846 _mov(Dest, T); |
1847 break; | 1847 break; |
1848 case InstArithmetic::Udiv: | 1848 case InstArithmetic::Udiv: |
1849 // div and idiv are the few arithmetic operators that do not allow | 1849 // div and idiv are the few arithmetic operators that do not allow |
1850 // immediates as the operand. | 1850 // immediates as the operand. |
1851 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1851 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1852 if (isByteSizedArithType(Dest->getType())) { | 1852 if (isByteSizedArithType(Dest->getType())) { |
1853 Variable *T_ah = nullptr; | 1853 // For 8-bit unsigned division we need to zero-extend al into ah. A mov |
1854 Constant *Zero = Ctx->getConstantZero(IceType_i8); | 1854 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64 |
| 1855 // assembler refuses to encode %ah (encoding %spl with a REX prefix |
| 1856 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah |
| 1857 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and |
| 1858 // d[%lh], which means the X86 target lowering (and the register |
| 1859 // allocator) would have to be aware of this restriction. For now, we |
| 1860 // simply zero %eax completely, and move the dividend into %al. |
| 1861 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 1862 Context.insert(InstFakeDef::create(Func, T_eax)); |
| 1863 _xor(T_eax, T_eax); |
1855 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1864 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
1856 _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah); | 1865 _div(T, Src1, T); |
1857 _div(T, Src1, T_ah); | |
1858 _mov(Dest, T); | 1866 _mov(Dest, T); |
| 1867 Context.insert(InstFakeUse::create(Func, T_eax)); |
1859 } else { | 1868 } else { |
1860 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1869 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1861 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1870 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
1862 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); | 1871 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); |
1863 _div(T, Src1, T_edx); | 1872 _div(T, Src1, T_edx); |
1864 _mov(Dest, T); | 1873 _mov(Dest, T); |
1865 } | 1874 } |
1866 break; | 1875 break; |
1867 case InstArithmetic::Sdiv: | 1876 case InstArithmetic::Sdiv: |
1868 // TODO(stichnot): Enable this after doing better performance | 1877 // TODO(stichnot): Enable this after doing better performance |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1910 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 1919 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
1911 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1920 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
1912 _cbwdq(T_edx, T); | 1921 _cbwdq(T_edx, T); |
1913 _idiv(T, Src1, T_edx); | 1922 _idiv(T, Src1, T_edx); |
1914 _mov(Dest, T); | 1923 _mov(Dest, T); |
1915 } | 1924 } |
1916 break; | 1925 break; |
1917 case InstArithmetic::Urem: | 1926 case InstArithmetic::Urem: |
1918 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1927 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1919 if (isByteSizedArithType(Dest->getType())) { | 1928 if (isByteSizedArithType(Dest->getType())) { |
1920 Variable *T_ah = nullptr; | 1929 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
1921 Constant *Zero = Ctx->getConstantZero(IceType_i8); | 1930 Context.insert(InstFakeDef::create(Func, T_eax)); |
| 1931 _xor(T_eax, T_eax); |
1922 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1932 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
1923 _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah); | 1933 Variable *T_al = makeReg(IceType_i8, Traits::RegisterSet::Reg_eax); |
1924 _div(T_ah, Src1, T); | 1934 _div(T_al, Src1, T); |
1925 _mov(Dest, T_ah); | 1935 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't |
| 1936 // mov %ah, %al because it would make x86-64 codegen more complicated. If |
| 1937 // this ever becomes a problem we can introduce a pseudo rem instruction |
| 1938 // that returns the remainder in %al directly (and uses a mov for copying |
| 1939 // %ah to %al.) |
| 1940 static constexpr uint8_t AlSizeInBits = 8; |
| 1941 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); |
| 1942 _mov(Dest, T_al); |
| 1943 Context.insert(InstFakeUse::create(Func, T_eax)); |
1926 } else { | 1944 } else { |
1927 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1945 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1928 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); | 1946 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); |
1929 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1947 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
1930 _div(T_edx, Src1, T); | 1948 _div(T_edx, Src1, T); |
1931 _mov(Dest, T_edx); | 1949 _mov(Dest, T_edx); |
1932 } | 1950 } |
1933 break; | 1951 break; |
1934 case InstArithmetic::Srem: | 1952 case InstArithmetic::Srem: |
1935 // TODO(stichnot): Enable this after doing better performance | 1953 // TODO(stichnot): Enable this after doing better performance |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1967 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); | 1985 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); |
1968 _sub(T, Src0); | 1986 _sub(T, Src0); |
1969 _neg(T); | 1987 _neg(T); |
1970 _mov(Dest, T); | 1988 _mov(Dest, T); |
1971 return; | 1989 return; |
1972 } | 1990 } |
1973 } | 1991 } |
1974 } | 1992 } |
1975 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1993 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1976 if (isByteSizedArithType(Dest->getType())) { | 1994 if (isByteSizedArithType(Dest->getType())) { |
1977 Variable *T_ah = makeReg(IceType_i8, Traits::RegisterSet::Reg_ah); | |
1978 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1995 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1996 // T is %al. |
1979 _cbwdq(T, T); | 1997 _cbwdq(T, T); |
1980 Context.insert(InstFakeDef::create(Func, T_ah)); | 1998 _idiv(T, Src1, T); |
1981 _idiv(T_ah, Src1, T); | 1999 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
1982 _mov(Dest, T_ah); | 2000 Context.insert(InstFakeDef::create(Func, T_eax)); |
| 2001 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't |
| 2002 // mov %ah, %al because it would make x86-64 codegen more complicated. If |
| 2003 // this ever becomes a problem we can introduce a pseudo rem instruction |
| 2004 // that returns the remainder in %al directly (and uses a mov for copying |
| 2005 // %ah to %al.) |
| 2006 static constexpr uint8_t AlSizeInBits = 8; |
| 2007 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); |
| 2008 _mov(Dest, T); |
| 2009 Context.insert(InstFakeUse::create(Func, T_eax)); |
1983 } else { | 2010 } else { |
1984 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 2011 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
1985 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 2012 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
1986 _cbwdq(T_edx, T); | 2013 _cbwdq(T_edx, T); |
1987 _idiv(T_edx, Src1, T); | 2014 _idiv(T_edx, Src1, T); |
1988 _mov(Dest, T_edx); | 2015 _mov(Dest, T_edx); |
1989 } | 2016 } |
1990 break; | 2017 break; |
1991 case InstArithmetic::Fadd: | 2018 case InstArithmetic::Fadd: |
1992 _mov(T, Src0); | 2019 _mov(T, Src0); |
(...skipping 3623 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5616 } | 5643 } |
5617 // the offset is not eligible for blinding or pooling, return the original | 5644 // the offset is not eligible for blinding or pooling, return the original |
5618 // mem operand | 5645 // mem operand |
5619 return MemOperand; | 5646 return MemOperand; |
5620 } | 5647 } |
5621 | 5648 |
5622 } // end of namespace X86Internal | 5649 } // end of namespace X86Internal |
5623 } // end of namespace Ice | 5650 } // end of namespace Ice |
5624 | 5651 |
5625 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5652 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |