Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(166)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1531623007: Add option to force filetype=asm for testing (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fix merge ordering issue. Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 1117 matching lines...) Expand 10 before | Expand all | Expand 10 after
1128 Src1 /= 3; 1128 Src1 /= 3;
1129 } else if (Src1 % 2 == 0) { 1129 } else if (Src1 % 2 == 0) {
1130 if (Count2 == 0) 1130 if (Count2 == 0)
1131 ++CountOps; 1131 ++CountOps;
1132 ++Count2; 1132 ++Count2;
1133 Src1 /= 2; 1133 Src1 /= 2;
1134 } else { 1134 } else {
1135 return false; 1135 return false;
1136 } 1136 }
1137 } 1137 }
1138 // Lea optimization only works for i16 and i32 types, not i8. 1138 // Lea optimization only works for i32 types, not i8 or i16.
Jim Stichnoth 2016/01/10 03:08:56 i32 type
sehr 2016/01/11 21:49:48 Done.
1139 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) 1139 if (Ty != IceType_i32 && (Count3 || Count5 || Count9))
1140 return false; 1140 return false;
1141 // Limit the number of lea/shl operations for a single multiply, to a 1141 // Limit the number of lea/shl operations for a single multiply, to a
1142 // somewhat arbitrary choice of 3. 1142 // somewhat arbitrary choice of 3.
1143 constexpr uint32_t MaxOpsForOptimizedMul = 3; 1143 constexpr uint32_t MaxOpsForOptimizedMul = 3;
1144 if (CountOps > MaxOpsForOptimizedMul) 1144 if (CountOps > MaxOpsForOptimizedMul)
1145 return false; 1145 return false;
1146 _mov(T, Src0); 1146 _mov(T, Src0);
1147 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1147 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1148 for (uint32_t i = 0; i < Count9; ++i) { 1148 for (uint32_t i = 0; i < Count9; ++i) {
1149 constexpr uint16_t Shift = 3; // log2(9-1) 1149 constexpr uint16_t Shift = 3; // log2(9-1)
(...skipping 4684 matching lines...) Expand 10 before | Expand all | Expand 10 after
5834 int32_t RegNum) { 5834 int32_t RegNum) {
5835 return makeZeroedRegister(Ty, RegNum); 5835 return makeZeroedRegister(Ty, RegNum);
5836 } 5836 }
5837 5837
5838 template <typename TraitsType> 5838 template <typename TraitsType>
5839 Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty, 5839 Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty,
5840 int32_t RegNum) { 5840 int32_t RegNum) {
5841 Variable *MinusOnes = makeReg(Ty, RegNum); 5841 Variable *MinusOnes = makeReg(Ty, RegNum);
5842 // Insert a FakeDef so the live range of MinusOnes is not overestimated. 5842 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
5843 Context.insert<InstFakeDef>(MinusOnes); 5843 Context.insert<InstFakeDef>(MinusOnes);
5844 _pcmpeq(MinusOnes, MinusOnes); 5844 if (Ty == IceType_f64)
5845 // Making a vector of minus ones of type f64 is currently only used for the
5846 // fabs intrinsic. To use the f64 type to create this mask with pcmpeqq
5847 // requires SSE 4.1. Since we're just creating a mask, pcmpeqd does the
5848 // same job and only requires SSE2.
5849 _pcmpeq(MinusOnes, MinusOnes, IceType_f32);
5850 else
5851 _pcmpeq(MinusOnes, MinusOnes);
5845 return MinusOnes; 5852 return MinusOnes;
5846 } 5853 }
5847 5854
5848 template <typename TraitsType> 5855 template <typename TraitsType>
5849 Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, int32_t RegNum) { 5856 Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, int32_t RegNum) {
5850 Variable *Dest = makeVectorOfZeros(Ty, RegNum); 5857 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
5851 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 5858 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
5852 _psub(Dest, MinusOne); 5859 _psub(Dest, MinusOne);
5853 return Dest; 5860 return Dest;
5854 } 5861 }
(...skipping 700 matching lines...) Expand 10 before | Expand all | Expand 10 after
6555 } 6562 }
6556 // the offset is not eligible for blinding or pooling, return the original 6563 // the offset is not eligible for blinding or pooling, return the original
6557 // mem operand 6564 // mem operand
6558 return MemOperand; 6565 return MemOperand;
6559 } 6566 }
6560 6567
6561 } // end of namespace X86NAMESPACE 6568 } // end of namespace X86NAMESPACE
6562 } // end of namespace Ice 6569 } // end of namespace Ice
6563 6570
6564 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 6571 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698