src/IceTargetLoweringX86BaseImpl.h - Issue 1260163003: Subzero. Removes references to %ah.

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1260163003: Subzero. Removes references to %ah. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//	1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 1832 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1843 if (!llvm::isa<Constant>(Src1))	1843 if (!llvm::isa<Constant>(Src1))

1844 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);	1844 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);

1845 _sar(T, Src1);	1845 _sar(T, Src1);

1846 _mov(Dest, T);	1846 _mov(Dest, T);

1847 break;	1847 break;

1848 case InstArithmetic::Udiv:	1848 case InstArithmetic::Udiv:

1849 // div and idiv are the few arithmetic operators that do not allow	1849 // div and idiv are the few arithmetic operators that do not allow

1850 // immediates as the operand.	1850 // immediates as the operand.

1851 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1851 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1852 if (isByteSizedArithType(Dest->getType())) {	1852 if (isByteSizedArithType(Dest->getType())) {

1853 Variable *T_ah = nullptr;	1853 Variable *T_eax = makeReg(IceType_i16, Traits::RegisterSet::Reg_eax);
	Jim Stichnoth 2015/07/28 15:58:23 Add a comment explaining the ah workaround. Add a comment explaining the ah workaround. jvoung (off chromium) 2015/07/28 16:17:45 For T_eax, why not use IceType_i32 instead of IceT For T_eax, why not use IceType_i32 instead of IceType_i16, like in the Urem case? Should be smaller without the 0x66 prefix? John 2015/07/28 18:21:16 Regardless of being smaler, using i32 is more effi Show quoted text On 2015/07/28 16:17:45, jvoung wrote: > For T_eax, why not use IceType_i32 instead of IceType_i16, like in the Urem > case? Should be smaller without the 0x66 prefix? Regardless of being smaler, using i32 is more efficient and what I intended. Thanks for catching this. John 2015/07/28 18:21:16 Done. Show quoted text On 2015/07/28 15:58:23, stichnot wrote: > Add a comment explaining the ah workaround. Done.
1854 Constant *Zero = Ctx->getConstantZero(IceType_i8);	1854 Context.insert(InstFakeDef::create(Func, T_eax));

	1855 _xor(T_eax, T_eax);

1855 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1856 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1856 _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah);	1857 _div(T, Src1, T);

1857 _div(T, Src1, T_ah);

1858 _mov(Dest, T);	1858 _mov(Dest, T);

	1859 Context.insert(InstFakeUse::create(Func, T_eax));

1859 } else {	1860 } else {

1860 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1861 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1861 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1862 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1862 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);	1863 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);

1863 _div(T, Src1, T_edx);	1864 _div(T, Src1, T_edx);

1864 _mov(Dest, T);	1865 _mov(Dest, T);

1865 }	1866 }

1866 break;	1867 break;

1867 case InstArithmetic::Sdiv:	1868 case InstArithmetic::Sdiv:

1868 // TODO(stichnot): Enable this after doing better performance	1869 // TODO(stichnot): Enable this after doing better performance

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1910 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);	1911 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);

1911 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1912 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1912 _cbwdq(T_edx, T);	1913 _cbwdq(T_edx, T);

1913 _idiv(T, Src1, T_edx);	1914 _idiv(T, Src1, T_edx);

1914 _mov(Dest, T);	1915 _mov(Dest, T);

1915 }	1916 }

1916 break;	1917 break;

1917 case InstArithmetic::Urem:	1918 case InstArithmetic::Urem:

1918 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1919 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1919 if (isByteSizedArithType(Dest->getType())) {	1920 if (isByteSizedArithType(Dest->getType())) {

1920 Variable *T_ah = nullptr;	1921 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

1921 Constant *Zero = Ctx->getConstantZero(IceType_i8);	1922 Context.insert(InstFakeDef::create(Func, T_eax));

	1923 _xor(T_eax, T_eax);

	1924 _set_dest_nonkillable();
	Jim Stichnoth 2015/07/28 15:58:23 I don't think any of these new instances of _set_d I don't think any of these new instances of _set_dest_nonkillable() are needed, can you double-check? That method is poorly named, sorry about that. Generally you only need to call it after inserting an instruction where the dest variable appears in a source operand of the instruction except for Dest==Srcs[0]. The Dest==Srcs[0] case is handled as part of postLower(). For any other source operands, or if Srcs[0] is e.g. a mem operand that contains Dest (such as in some lea cases), calling it is needed. John 2015/07/28 18:21:16 Done. Show quoted text On 2015/07/28 15:58:23, stichnot wrote: > I don't think any of these new instances of _set_dest_nonkillable() are needed, > can you double-check? > > That method is poorly named, sorry about that. Generally you only need to call > it after inserting an instruction where the dest variable appears in a source > operand of the instruction except for Dest==Srcs[0]. The Dest==Srcs[0] case is > handled as part of postLower(). For any other source operands, or if Srcs[0] is > e.g. a mem operand that contains Dest (such as in some lea cases), calling it is > needed. Done.
1922 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1925 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1923 _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah);	1926 Variable *T_al = makeReg(IceType_i8, Traits::RegisterSet::Reg_eax);

1924 _div(T_ah, Src1, T);	1927 _div(T_al, Src1, T);

1925 _mov(Dest, T_ah);	1928 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't

	1929 // mov %ah, %al because it would make x86-64 codegen more complicated. If

	1930 // this ever becomes a problem we can introduce a pseudo rem instruction

	1931 // that returns the remainder in %al directly (and uses a mov for copying

	1932 // %ah to %al.)

	1933 _shr(T_eax, Ctx->getConstantInt8(8));
	Jim Stichnoth 2015/07/28 15:58:23 Consider using X86_CHAR_BIT instead of 8. Consider using X86_CHAR_BIT instead of 8. John 2015/07/28 18:21:16 The name X86_CHAR_BIT is somewhat confusing. In C Show quoted text On 2015/07/28 15:58:23, stichnot wrote: > Consider using X86_CHAR_BIT instead of 8. The name X86_CHAR_BIT is somewhat confusing. In C CHAR_BIT represents the number of bits in a char (which you can't portably trust to always be 8.) I would rather leave an 8 (or use a named constant, e.g., AlSizeInBytes.) In the future I would like to rename X86_CHAR_BITS to X86_BYTE_SIZE or something else.
	1934 _mov(Dest, T_al);

	1935 Context.insert(InstFakeUse::create(Func, T_eax));

1926 } else {	1936 } else {

1927 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1937 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1928 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);	1938 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);

1929 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1939 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1930 _div(T_edx, Src1, T);	1940 _div(T_edx, Src1, T);

1931 _mov(Dest, T_edx);	1941 _mov(Dest, T_edx);

1932 }	1942 }

1933 break;	1943 break;

1934 case InstArithmetic::Srem:	1944 case InstArithmetic::Srem:

1935 // TODO(stichnot): Enable this after doing better performance	1945 // TODO(stichnot): Enable this after doing better performance

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1967 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));	1977 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));

1968 _sub(T, Src0);	1978 _sub(T, Src0);

1969 _neg(T);	1979 _neg(T);

1970 _mov(Dest, T);	1980 _mov(Dest, T);

1971 return;	1981 return;

1972 }	1982 }

1973 }	1983 }

1974 }	1984 }

1975 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1985 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1976 if (isByteSizedArithType(Dest->getType())) {	1986 if (isByteSizedArithType(Dest->getType())) {

1977 Variable *T_ah = makeReg(IceType_i8, Traits::RegisterSet::Reg_ah);

1978 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1987 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

	1988 // T is %al.

	1989 _set_dest_nonkillable();

1979 _cbwdq(T, T);	1990 _cbwdq(T, T);

1980 Context.insert(InstFakeDef::create(Func, T_ah));	1991 _idiv(T, Src1, T);

1981 _idiv(T_ah, Src1, T);	1992 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

1982 _mov(Dest, T_ah);	1993 Context.insert(InstFakeDef::create(Func, T_eax));

	1994 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't

	1995 // mov %ah, %al because it would make x86-64 codegen more complicated. If

	1996 // this ever becomes a problem we can introduce a pseudo rem instruction

	1997 // that returns the remainder in %al directly (and uses a mov for copying

	1998 // %ah to %al.)

	1999 _shr(T_eax, Ctx->getConstantInt8(8));

	2000 _mov(Dest, T);

	2001 Context.insert(InstFakeUse::create(Func, T_eax));

1983 } else {	2002 } else {

1984 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);	2003 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);

1985 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	2004 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1986 _cbwdq(T_edx, T);	2005 _cbwdq(T_edx, T);

1987 _idiv(T_edx, Src1, T);	2006 _idiv(T_edx, Src1, T);

1988 _mov(Dest, T_edx);	2007 _mov(Dest, T_edx);

1989 }	2008 }

1990 break;	2009 break;

1991 case InstArithmetic::Fadd:	2010 case InstArithmetic::Fadd:

1992 _mov(T, Src0);	2011 _mov(T, Src0);

(...skipping 3623 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5616 }	5635 }

5617 // the offset is not eligible for blinding or pooling, return the original	5636 // the offset is not eligible for blinding or pooling, return the original

5618 // mem operand	5637 // mem operand

5619 return MemOperand;	5638 return MemOperand;

5620 }	5639 }

5621	5640

5622 } // end of namespace X86Internal	5641 } // end of namespace X86Internal

5623 } // end of namespace Ice	5642 } // end of namespace Ice

5624	5643

5625 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	5644 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

OLD	NEW

« src/IceRegistersX8632.h ('K') | « src/IceRegistersX8664.h ('k') | no next file » | no next file with comments »