src/IceTargetLoweringX86BaseImpl.h - Issue 1260163003: Subzero. Removes references to %ah.

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1260163003: Subzero. Removes references to %ah. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: git pull Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//	1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 1832 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1843 if (!llvm::isa<Constant>(Src1))	1843 if (!llvm::isa<Constant>(Src1))

1844 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);	1844 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);

1845 _sar(T, Src1);	1845 _sar(T, Src1);

1846 _mov(Dest, T);	1846 _mov(Dest, T);

1847 break;	1847 break;

1848 case InstArithmetic::Udiv:	1848 case InstArithmetic::Udiv:

1849 // div and idiv are the few arithmetic operators that do not allow	1849 // div and idiv are the few arithmetic operators that do not allow

1850 // immediates as the operand.	1850 // immediates as the operand.

1851 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1851 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1852 if (isByteSizedArithType(Dest->getType())) {	1852 if (isByteSizedArithType(Dest->getType())) {

1853 Variable *T_ah = nullptr;	1853 // For 8-bit unsigned division we need to zero-extend al into ah. A mov

1854 Constant *Zero = Ctx->getConstantZero(IceType_i8);	1854 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64

	1855 // assembler refuses to encode %ah (encoding %spl with a REX prefix

	1856 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah

	1857 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and

	1858 // d[%lh], which means the X86 target lowering (and the register

	1859 // allocator) would have to be aware of this restriction. For now, we

	1860 // simply zero %eax completely, and move the dividend into %al.

	1861 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

	1862 Context.insert(InstFakeDef::create(Func, T_eax));

	1863 _xor(T_eax, T_eax);

1855 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1864 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1856 _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah);	1865 _div(T, Src1, T);

1857 _div(T, Src1, T_ah);

1858 _mov(Dest, T);	1866 _mov(Dest, T);

	1867 Context.insert(InstFakeUse::create(Func, T_eax));

1859 } else {	1868 } else {

1860 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1869 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1861 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1870 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1862 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);	1871 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);

1863 _div(T, Src1, T_edx);	1872 _div(T, Src1, T_edx);

1864 _mov(Dest, T);	1873 _mov(Dest, T);

1865 }	1874 }

1866 break;	1875 break;

1867 case InstArithmetic::Sdiv:	1876 case InstArithmetic::Sdiv:

1868 // TODO(stichnot): Enable this after doing better performance	1877 // TODO(stichnot): Enable this after doing better performance

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1910 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);	1919 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);

1911 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1920 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1912 _cbwdq(T_edx, T);	1921 _cbwdq(T_edx, T);

1913 _idiv(T, Src1, T_edx);	1922 _idiv(T, Src1, T_edx);

1914 _mov(Dest, T);	1923 _mov(Dest, T);

1915 }	1924 }

1916 break;	1925 break;

1917 case InstArithmetic::Urem:	1926 case InstArithmetic::Urem:

1918 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1927 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1919 if (isByteSizedArithType(Dest->getType())) {	1928 if (isByteSizedArithType(Dest->getType())) {

1920 Variable *T_ah = nullptr;	1929 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

1921 Constant *Zero = Ctx->getConstantZero(IceType_i8);	1930 Context.insert(InstFakeDef::create(Func, T_eax));

	1931 _xor(T_eax, T_eax);

1922 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1932 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1923 _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah);	1933 Variable *T_al = makeReg(IceType_i8, Traits::RegisterSet::Reg_eax);

1924 _div(T_ah, Src1, T);	1934 _div(T_al, Src1, T);

1925 _mov(Dest, T_ah);	1935 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't

	1936 // mov %ah, %al because it would make x86-64 codegen more complicated. If

	1937 // this ever becomes a problem we can introduce a pseudo rem instruction

	1938 // that returns the remainder in %al directly (and uses a mov for copying

	1939 // %ah to %al.)

	1940 static constexpr uint8_t AlSizeInBits = 8;

	1941 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));

	1942 _mov(Dest, T_al);

	1943 Context.insert(InstFakeUse::create(Func, T_eax));

1926 } else {	1944 } else {

1927 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1945 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1928 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);	1946 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);

1929 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1947 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1930 _div(T_edx, Src1, T);	1948 _div(T_edx, Src1, T);

1931 _mov(Dest, T_edx);	1949 _mov(Dest, T_edx);

1932 }	1950 }

1933 break;	1951 break;

1934 case InstArithmetic::Srem:	1952 case InstArithmetic::Srem:

1935 // TODO(stichnot): Enable this after doing better performance	1953 // TODO(stichnot): Enable this after doing better performance

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1967 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));	1985 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));

1968 _sub(T, Src0);	1986 _sub(T, Src0);

1969 _neg(T);	1987 _neg(T);

1970 _mov(Dest, T);	1988 _mov(Dest, T);

1971 return;	1989 return;

1972 }	1990 }

1973 }	1991 }

1974 }	1992 }

1975 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1993 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1976 if (isByteSizedArithType(Dest->getType())) {	1994 if (isByteSizedArithType(Dest->getType())) {

1977 Variable *T_ah = makeReg(IceType_i8, Traits::RegisterSet::Reg_ah);

1978 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1995 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

	1996 // T is %al.

1979 _cbwdq(T, T);	1997 _cbwdq(T, T);

1980 Context.insert(InstFakeDef::create(Func, T_ah));	1998 _idiv(T, Src1, T);

1981 _idiv(T_ah, Src1, T);	1999 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

1982 _mov(Dest, T_ah);	2000 Context.insert(InstFakeDef::create(Func, T_eax));

	2001 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't

	2002 // mov %ah, %al because it would make x86-64 codegen more complicated. If

	2003 // this ever becomes a problem we can introduce a pseudo rem instruction

	2004 // that returns the remainder in %al directly (and uses a mov for copying

	2005 // %ah to %al.)

	2006 static constexpr uint8_t AlSizeInBits = 8;

	2007 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));

	2008 _mov(Dest, T);

	2009 Context.insert(InstFakeUse::create(Func, T_eax));

1983 } else {	2010 } else {

1984 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);	2011 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);

1985 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	2012 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1986 _cbwdq(T_edx, T);	2013 _cbwdq(T_edx, T);

1987 _idiv(T_edx, Src1, T);	2014 _idiv(T_edx, Src1, T);

1988 _mov(Dest, T_edx);	2015 _mov(Dest, T_edx);

1989 }	2016 }

1990 break;	2017 break;

1991 case InstArithmetic::Fadd:	2018 case InstArithmetic::Fadd:

1992 _mov(T, Src0);	2019 _mov(T, Src0);

(...skipping 3623 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5616 }	5643 }

5617 // the offset is not eligible for blinding or pooling, return the original	5644 // the offset is not eligible for blinding or pooling, return the original

5618 // mem operand	5645 // mem operand

5619 return MemOperand;	5646 return MemOperand;

5620 }	5647 }

5621	5648

5622 } // end of namespace X86Internal	5649 } // end of namespace X86Internal

5623 } // end of namespace Ice	5650 } // end of namespace Ice

5624	5651

5625 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	5652 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

OLD	NEW

« no previous file with comments | « src/IceRegistersX8664.h ('k') | no next file » | no next file with comments »