src/IceTargetLoweringX86BaseImpl.h - Issue 1531623007: Add option to force filetype=asm for testing

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1531623007: Add option to force filetype=asm for testing (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Fix merge ordering issue. Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//	1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 1117 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1128 Src1 /= 3;	1128 Src1 /= 3;

1129 } else if (Src1 % 2 == 0) {	1129 } else if (Src1 % 2 == 0) {

1130 if (Count2 == 0)	1130 if (Count2 == 0)

1131 ++CountOps;	1131 ++CountOps;

1132 ++Count2;	1132 ++Count2;

1133 Src1 /= 2;	1133 Src1 /= 2;

1134 } else {	1134 } else {

1135 return false;	1135 return false;

1136 }	1136 }

1137 }	1137 }

1138 // Lea optimization only works for i16 and i32 types, not i8.	1138 // Lea optimization only works for i32 types, not i8 or i16.
	Jim Stichnoth 2016/01/10 03:08:56 i32 type i32 type sehr 2016/01/11 21:49:48 Done. Show quoted text On 2016/01/10 03:08:56, stichnot wrote: > i32 type Done.
1139 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 \|\| Count5 \|\| Count9))	1139 if (Ty != IceType_i32 && (Count3 \|\| Count5 \|\| Count9))

1140 return false;	1140 return false;

1141 // Limit the number of lea/shl operations for a single multiply, to a	1141 // Limit the number of lea/shl operations for a single multiply, to a

1142 // somewhat arbitrary choice of 3.	1142 // somewhat arbitrary choice of 3.

1143 constexpr uint32_t MaxOpsForOptimizedMul = 3;	1143 constexpr uint32_t MaxOpsForOptimizedMul = 3;

1144 if (CountOps > MaxOpsForOptimizedMul)	1144 if (CountOps > MaxOpsForOptimizedMul)

1145 return false;	1145 return false;

1146 _mov(T, Src0);	1146 _mov(T, Src0);

1147 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1147 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1148 for (uint32_t i = 0; i < Count9; ++i) {	1148 for (uint32_t i = 0; i < Count9; ++i) {

1149 constexpr uint16_t Shift = 3; // log2(9-1)	1149 constexpr uint16_t Shift = 3; // log2(9-1)

(...skipping 4684 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5834 int32_t RegNum) {	5834 int32_t RegNum) {

5835 return makeZeroedRegister(Ty, RegNum);	5835 return makeZeroedRegister(Ty, RegNum);

5836 }	5836 }

5837	5837

5838 template <typename TraitsType>	5838 template <typename TraitsType>

5839 Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty,	5839 Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty,

5840 int32_t RegNum) {	5840 int32_t RegNum) {

5841 Variable *MinusOnes = makeReg(Ty, RegNum);	5841 Variable *MinusOnes = makeReg(Ty, RegNum);

5842 // Insert a FakeDef so the live range of MinusOnes is not overestimated.	5842 // Insert a FakeDef so the live range of MinusOnes is not overestimated.

5843 Context.insert<InstFakeDef>(MinusOnes);	5843 Context.insert<InstFakeDef>(MinusOnes);

5844 _pcmpeq(MinusOnes, MinusOnes);	5844 if (Ty == IceType_f64)

	5845 // Making a vector of minus ones of type f64 is currently only used for the

	5846 // fabs intrinsic. To use the f64 type to create this mask with pcmpeqq

	5847 // requires SSE 4.1. Since we're just creating a mask, pcmpeqd does the

	5848 // same job and only requires SSE2.

	5849 _pcmpeq(MinusOnes, MinusOnes, IceType_f32);

	5850 else

	5851 _pcmpeq(MinusOnes, MinusOnes);

5845 return MinusOnes;	5852 return MinusOnes;

5846 }	5853 }

5847	5854

5848 template <typename TraitsType>	5855 template <typename TraitsType>

5849 Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, int32_t RegNum) {	5856 Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, int32_t RegNum) {

5850 Variable *Dest = makeVectorOfZeros(Ty, RegNum);	5857 Variable *Dest = makeVectorOfZeros(Ty, RegNum);

5851 Variable *MinusOne = makeVectorOfMinusOnes(Ty);	5858 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

5852 _psub(Dest, MinusOne);	5859 _psub(Dest, MinusOne);

5853 return Dest;	5860 return Dest;

5854 }	5861 }

(...skipping 700 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6555 }	6562 }

6556 // the offset is not eligible for blinding or pooling, return the original	6563 // the offset is not eligible for blinding or pooling, return the original

6557 // mem operand	6564 // mem operand

6558 return MemOperand;	6565 return MemOperand;

6559 }	6566 }

6560	6567

6561 } // end of namespace X86NAMESPACE	6568 } // end of namespace X86NAMESPACE

6562 } // end of namespace Ice	6569 } // end of namespace Ice

6563	6570

6564 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	6571 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

OLD	NEW

« src/IceTargetLoweringX8632Traits.h ('K') | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/assembler/x86/jump_encodings.ll » ('j') | tests_lit/assembler/x86/jump_encodings.ll » ('J')