OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
53 const struct TableFcmp_ { | 53 const struct TableFcmp_ { |
54 uint32_t Default; | 54 uint32_t Default; |
55 bool SwapScalarOperands; | 55 bool SwapScalarOperands; |
56 CondX86::BrCond C1, C2; | 56 CondX86::BrCond C1, C2; |
57 bool SwapVectorOperands; | 57 bool SwapVectorOperands; |
58 CondX86::CmppsCond Predicate; | 58 CondX86::CmppsCond Predicate; |
59 } TableFcmp[] = { | 59 } TableFcmp[] = { |
60 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ | 60 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ |
61 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \ | 61 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \ |
62 , | 62 , |
63 FCMPX8632_TABLE | 63 FCMPX8632_TABLE |
64 #undef X | 64 #undef X |
65 }; | 65 }; |
66 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp); | 66 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp); |
67 | 67 |
68 // The following table summarizes the logic for lowering the icmp instruction | 68 // The following table summarizes the logic for lowering the icmp instruction |
69 // for i32 and narrower types. Each icmp condition has a clear mapping to an | 69 // for i32 and narrower types. Each icmp condition has a clear mapping to an |
70 // x86 conditional branch instruction. | 70 // x86 conditional branch instruction. |
71 | 71 |
72 const struct TableIcmp32_ { | 72 const struct TableIcmp32_ { |
73 CondX86::BrCond Mapping; | 73 CondX86::BrCond Mapping; |
74 } TableIcmp32[] = { | 74 } TableIcmp32[] = { |
75 #define X(val, C_32, C1_64, C2_64, C3_64) \ | 75 #define X(val, C_32, C1_64, C2_64, C3_64) \ |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
147 } | 147 } |
148 | 148 |
149 // Value is in bytes. Return Value adjusted to the next highest multiple | 149 // Value is in bytes. Return Value adjusted to the next highest multiple |
150 // of the stack alignment. | 150 // of the stack alignment. |
151 uint32_t applyStackAlignment(uint32_t Value) { | 151 uint32_t applyStackAlignment(uint32_t Value) { |
152 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); | 152 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); |
153 } | 153 } |
154 | 154 |
155 // Instruction set options | 155 // Instruction set options |
156 namespace cl = ::llvm::cl; | 156 namespace cl = ::llvm::cl; |
157 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( | 157 cl::opt<TargetX8632::X86InstructionSet> |
158 "mattr", cl::desc("X86 target attributes"), | 158 CLInstructionSet("mattr", cl::desc("X86 target attributes"), |
159 cl::init(TargetX8632::SSE2), | 159 cl::init(TargetX8632::SSE2), |
160 cl::values( | 160 cl::values(clEnumValN(TargetX8632::SSE2, "sse2", |
161 clEnumValN(TargetX8632::SSE2, "sse2", | 161 "Enable SSE2 instructions (default)"), |
162 "Enable SSE2 instructions (default)"), | 162 clEnumValN(TargetX8632::SSE4_1, "sse4.1", |
163 clEnumValN(TargetX8632::SSE4_1, "sse4.1", | 163 "Enable SSE 4.1 instructions"), |
164 "Enable SSE 4.1 instructions"), clEnumValEnd)); | 164 clEnumValEnd)); |
165 | 165 |
166 // In some cases, there are x-macros tables for both high-level and | 166 // In some cases, there are x-macros tables for both high-level and |
167 // low-level instructions/operands that use the same enum key value. | 167 // low-level instructions/operands that use the same enum key value. |
168 // The tables are kept separate to maintain a proper separation | 168 // The tables are kept separate to maintain a proper separation |
169 // between abstraction layers. There is a risk that the tables | 169 // between abstraction layers. There is a risk that the tables could |
170 // could get out of sync if enum values are reordered or if entries | 170 // get out of sync if enum values are reordered or if entries are |
171 // are added or deleted. This dummy function uses static_assert to | 171 // added or deleted. The following dummy namespaces use |
172 // ensure everything is kept in sync. | 172 // static_asserts to ensure everything is kept in sync. |
173 void __attribute__((unused)) xMacroIntegrityCheck() { | 173 |
174 // Validate the enum values in FCMPX8632_TABLE. | 174 // Validate the enum values in FCMPX8632_TABLE. |
175 { | 175 namespace dummy1 { |
176 // Define a temporary set of enum values based on low-level | 176 // Define a temporary set of enum values based on low-level table |
177 // table entries. | 177 // entries. |
178 enum _tmp_enum { | 178 enum _tmp_enum { |
179 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val, | 179 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val, |
180 FCMPX8632_TABLE | 180 FCMPX8632_TABLE |
181 #undef X | 181 #undef X |
182 _num | 182 _num |
183 }; | 183 }; |
184 // Define a set of constants based on high-level table entries. | 184 // Define a set of constants based on high-level table entries. |
185 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag; | 185 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag; |
186 ICEINSTFCMP_TABLE; | 186 ICEINSTFCMP_TABLE; |
187 #undef X | 187 #undef X |
188 // Define a set of constants based on low-level table entries, | 188 // Define a set of constants based on low-level table entries, and |
189 // and ensure the table entry keys are consistent. | 189 // ensure the table entry keys are consistent. |
190 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ | 190 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ |
191 static const int _table2_##val = _tmp_##val; \ | 191 static const int _table2_##val = _tmp_##val; \ |
192 STATIC_ASSERT(_table1_##val == _table2_##val); | 192 static_assert( \ |
193 FCMPX8632_TABLE; | 193 _table1_##val == _table2_##val, \ |
| 194 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE"); |
| 195 FCMPX8632_TABLE; |
194 #undef X | 196 #undef X |
195 // Repeat the static asserts with respect to the high-level | 197 // Repeat the static asserts with respect to the high-level table |
196 // table entries in case the high-level table has extra entries. | 198 // entries in case the high-level table has extra entries. |
197 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); | 199 #define X(tag, str) \ |
198 ICEINSTFCMP_TABLE; | 200 static_assert( \ |
| 201 _table1_##tag == _table2_##tag, \ |
| 202 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE"); |
| 203 ICEINSTFCMP_TABLE; |
199 #undef X | 204 #undef X |
200 } | 205 } // end of namespace dummy1 |
201 | 206 |
202 // Validate the enum values in ICMPX8632_TABLE. | 207 // Validate the enum values in ICMPX8632_TABLE. |
203 { | 208 namespace dummy2 { |
204 // Define a temporary set of enum values based on low-level | 209 // Define a temporary set of enum values based on low-level table |
205 // table entries. | 210 // entries. |
206 enum _tmp_enum { | 211 enum _tmp_enum { |
207 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val, | 212 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val, |
208 ICMPX8632_TABLE | 213 ICMPX8632_TABLE |
209 #undef X | 214 #undef X |
210 _num | 215 _num |
211 }; | 216 }; |
212 // Define a set of constants based on high-level table entries. | 217 // Define a set of constants based on high-level table entries. |
213 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; | 218 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; |
214 ICEINSTICMP_TABLE; | 219 ICEINSTICMP_TABLE; |
215 #undef X | 220 #undef X |
216 // Define a set of constants based on low-level table entries, | 221 // Define a set of constants based on low-level table entries, and |
217 // and ensure the table entry keys are consistent. | 222 // ensure the table entry keys are consistent. |
218 #define X(val, C_32, C1_64, C2_64, C3_64) \ | 223 #define X(val, C_32, C1_64, C2_64, C3_64) \ |
219 static const int _table2_##val = _tmp_##val; \ | 224 static const int _table2_##val = _tmp_##val; \ |
220 STATIC_ASSERT(_table1_##val == _table2_##val); | 225 static_assert( \ |
221 ICMPX8632_TABLE; | 226 _table1_##val == _table2_##val, \ |
| 227 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE"); |
| 228 ICMPX8632_TABLE; |
222 #undef X | 229 #undef X |
223 // Repeat the static asserts with respect to the high-level | 230 // Repeat the static asserts with respect to the high-level table |
224 // table entries in case the high-level table has extra entries. | 231 // entries in case the high-level table has extra entries. |
225 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); | 232 #define X(tag, str) \ |
226 ICEINSTICMP_TABLE; | 233 static_assert( \ |
| 234 _table1_##tag == _table2_##tag, \ |
| 235 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE"); |
| 236 ICEINSTICMP_TABLE; |
227 #undef X | 237 #undef X |
228 } | 238 } // end of namespace dummy2 |
229 | 239 |
230 // Validate the enum values in ICETYPEX8632_TABLE. | 240 // Validate the enum values in ICETYPEX8632_TABLE. |
231 { | 241 namespace dummy3 { |
232 // Define a temporary set of enum values based on low-level | 242 // Define a temporary set of enum values based on low-level table |
233 // table entries. | 243 // entries. |
234 enum _tmp_enum { | 244 enum _tmp_enum { |
235 #define X(tag, elementty, cvt, sdss, pack, width) _tmp_##tag, | 245 #define X(tag, elementty, cvt, sdss, pack, width) _tmp_##tag, |
236 ICETYPEX8632_TABLE | 246 ICETYPEX8632_TABLE |
237 #undef X | 247 #undef X |
238 _num | 248 _num |
239 }; | 249 }; |
240 // Define a set of constants based on high-level table entries. | 250 // Define a set of constants based on high-level table entries. |
241 #define X(tag, size, align, elts, elty, str) \ | 251 #define X(tag, size, align, elts, elty, str) \ |
242 static const int _table1_##tag = tag; | 252 static const int _table1_##tag = tag; |
243 ICETYPE_TABLE; | 253 ICETYPE_TABLE; |
244 #undef X | 254 #undef X |
245 // Define a set of constants based on low-level table entries, | 255 // Define a set of constants based on low-level table entries, and |
246 // and ensure the table entry keys are consistent. | 256 // ensure the table entry keys are consistent. |
247 #define X(tag, elementty, cvt, sdss, pack, width) \ | 257 #define X(tag, elementty, cvt, sdss, pack, width) \ |
248 static const int _table2_##tag = _tmp_##tag; \ | 258 static const int _table2_##tag = _tmp_##tag; \ |
249 STATIC_ASSERT(_table1_##tag == _table2_##tag); | 259 static_assert(_table1_##tag == _table2_##tag, \ |
250 ICETYPEX8632_TABLE; | 260 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); |
| 261 ICETYPEX8632_TABLE; |
251 #undef X | 262 #undef X |
252 // Repeat the static asserts with respect to the high-level | 263 // Repeat the static asserts with respect to the high-level table |
253 // table entries in case the high-level table has extra entries. | 264 // entries in case the high-level table has extra entries. |
254 #define X(tag, size, align, elts, elty, str) \ | 265 #define X(tag, size, align, elts, elty, str) \ |
255 STATIC_ASSERT(_table1_##tag == _table2_##tag); | 266 static_assert(_table1_##tag == _table2_##tag, \ |
256 ICETYPE_TABLE; | 267 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); |
| 268 ICETYPE_TABLE; |
257 #undef X | 269 #undef X |
258 } | 270 } // end of namespace dummy3 |
259 } | |
260 | 271 |
261 } // end of anonymous namespace | 272 } // end of anonymous namespace |
262 | 273 |
263 TargetX8632::TargetX8632(Cfg *Func) | 274 TargetX8632::TargetX8632(Cfg *Func) |
264 : TargetLowering(Func), InstructionSet(CLInstructionSet), | 275 : TargetLowering(Func), InstructionSet(CLInstructionSet), |
265 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), | 276 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), |
266 SpillAreaSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false), | 277 SpillAreaSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false), |
267 PhysicalRegisters(VarList(RegX8632::Reg_NUM)) { | 278 PhysicalRegisters(VarList(RegX8632::Reg_NUM)) { |
268 // TODO: Don't initialize IntegerRegisters and friends every time. | 279 // TODO: Don't initialize IntegerRegisters and friends every time. |
269 // Instead, initialize in some sort of static initializer for the | 280 // Instead, initialize in some sort of static initializer for the |
(...skipping 2520 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2790 // | 2801 // |
2791 // insertelement into index 2 (result is stored in T): | 2802 // insertelement into index 2 (result is stored in T): |
2792 // T := SourceVectRM | 2803 // T := SourceVectRM |
2793 // ElementR := ElementR[0, 0] T[0, 3] | 2804 // ElementR := ElementR[0, 0] T[0, 3] |
2794 // T := T[0, 1] ElementR[0, 3] | 2805 // T := T[0, 1] ElementR[0, 3] |
2795 // | 2806 // |
2796 // insertelement into index 3 (result is stored in T): | 2807 // insertelement into index 3 (result is stored in T): |
2797 // T := SourceVectRM | 2808 // T := SourceVectRM |
2798 // ElementR := ElementR[0, 0] T[0, 2] | 2809 // ElementR := ElementR[0, 0] T[0, 2] |
2799 // T := T[0, 1] ElementR[3, 0] | 2810 // T := T[0, 1] ElementR[3, 0] |
2800 const unsigned char Mask1[3] = {0, 192, 128}; | 2811 const unsigned char Mask1[3] = { 0, 192, 128 }; |
2801 const unsigned char Mask2[3] = {227, 196, 52}; | 2812 const unsigned char Mask2[3] = { 227, 196, 52 }; |
2802 | 2813 |
2803 Constant *Mask1Constant = | 2814 Constant *Mask1Constant = |
2804 Ctx->getConstantInt32(IceType_i8, Mask1[Index - 1]); | 2815 Ctx->getConstantInt32(IceType_i8, Mask1[Index - 1]); |
2805 Constant *Mask2Constant = | 2816 Constant *Mask2Constant = |
2806 Ctx->getConstantInt32(IceType_i8, Mask2[Index - 1]); | 2817 Ctx->getConstantInt32(IceType_i8, Mask2[Index - 1]); |
2807 | 2818 |
2808 if (Index == 1) { | 2819 if (Index == 1) { |
2809 _shufps(ElementR, SourceVectRM, Mask1Constant); | 2820 _shufps(ElementR, SourceVectRM, Mask1Constant); |
2810 _shufps(ElementR, SourceVectRM, Mask2Constant); | 2821 _shufps(ElementR, SourceVectRM, Mask2Constant); |
2811 _movp(Inst->getDest(), ElementR); | 2822 _movp(Inst->getDest(), ElementR); |
(...skipping 24 matching lines...) Expand all Loading... |
2836 Variable *T = makeReg(Ty); | 2847 Variable *T = makeReg(Ty); |
2837 _movp(T, Slot); | 2848 _movp(T, Slot); |
2838 _movp(Inst->getDest(), T); | 2849 _movp(Inst->getDest(), T); |
2839 } | 2850 } |
2840 } | 2851 } |
2841 | 2852 |
2842 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 2853 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
2843 switch (Instr->getIntrinsicInfo().ID) { | 2854 switch (Instr->getIntrinsicInfo().ID) { |
2844 case Intrinsics::AtomicCmpxchg: { | 2855 case Intrinsics::AtomicCmpxchg: { |
2845 if (!Intrinsics::VerifyMemoryOrder( | 2856 if (!Intrinsics::VerifyMemoryOrder( |
2846 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) { | 2857 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) { |
2847 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); | 2858 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); |
2848 return; | 2859 return; |
2849 } | 2860 } |
2850 if (!Intrinsics::VerifyMemoryOrder( | 2861 if (!Intrinsics::VerifyMemoryOrder( |
2851 llvm::cast<ConstantInteger32>(Instr->getArg(4))->getValue())) { | 2862 llvm::cast<ConstantInteger32>(Instr->getArg(4))->getValue())) { |
2852 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); | 2863 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); |
2853 return; | 2864 return; |
2854 } | 2865 } |
2855 Variable *DestPrev = Instr->getDest(); | 2866 Variable *DestPrev = Instr->getDest(); |
2856 Operand *PtrToMem = Instr->getArg(0); | 2867 Operand *PtrToMem = Instr->getArg(0); |
2857 Operand *Expected = Instr->getArg(1); | 2868 Operand *Expected = Instr->getArg(1); |
2858 Operand *Desired = Instr->getArg(2); | 2869 Operand *Desired = Instr->getArg(2); |
2859 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired)) | 2870 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired)) |
2860 return; | 2871 return; |
2861 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired); | 2872 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired); |
2862 return; | 2873 return; |
2863 } | 2874 } |
2864 case Intrinsics::AtomicFence: | 2875 case Intrinsics::AtomicFence: |
2865 if (!Intrinsics::VerifyMemoryOrder( | 2876 if (!Intrinsics::VerifyMemoryOrder( |
2866 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue())) { | 2877 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue())) { |
2867 Func->setError("Unexpected memory ordering for AtomicFence"); | 2878 Func->setError("Unexpected memory ordering for AtomicFence"); |
2868 return; | 2879 return; |
2869 } | 2880 } |
2870 _mfence(); | 2881 _mfence(); |
2871 return; | 2882 return; |
2872 case Intrinsics::AtomicFenceAll: | 2883 case Intrinsics::AtomicFenceAll: |
2873 // NOTE: FenceAll should prevent and load/store from being moved | 2884 // NOTE: FenceAll should prevent and load/store from being moved |
2874 // across the fence (both atomic and non-atomic). The InstX8632Mfence | 2885 // across the fence (both atomic and non-atomic). The InstX8632Mfence |
2875 // instruction is currently marked coarsely as "HasSideEffects". | 2886 // instruction is currently marked coarsely as "HasSideEffects". |
2876 _mfence(); | 2887 _mfence(); |
(...skipping 25 matching lines...) Expand all Loading... |
2902 return; | 2913 return; |
2903 } | 2914 } |
2904 // The PNaCl ABI requires the byte size to be a compile-time constant. | 2915 // The PNaCl ABI requires the byte size to be a compile-time constant. |
2905 Func->setError("AtomicIsLockFree byte size should be compile-time const"); | 2916 Func->setError("AtomicIsLockFree byte size should be compile-time const"); |
2906 return; | 2917 return; |
2907 } | 2918 } |
2908 case Intrinsics::AtomicLoad: { | 2919 case Intrinsics::AtomicLoad: { |
2909 // We require the memory address to be naturally aligned. | 2920 // We require the memory address to be naturally aligned. |
2910 // Given that is the case, then normal loads are atomic. | 2921 // Given that is the case, then normal loads are atomic. |
2911 if (!Intrinsics::VerifyMemoryOrder( | 2922 if (!Intrinsics::VerifyMemoryOrder( |
2912 llvm::cast<ConstantInteger32>(Instr->getArg(1))->getValue())) { | 2923 llvm::cast<ConstantInteger32>(Instr->getArg(1))->getValue())) { |
2913 Func->setError("Unexpected memory ordering for AtomicLoad"); | 2924 Func->setError("Unexpected memory ordering for AtomicLoad"); |
2914 return; | 2925 return; |
2915 } | 2926 } |
2916 Variable *Dest = Instr->getDest(); | 2927 Variable *Dest = Instr->getDest(); |
2917 if (Dest->getType() == IceType_i64) { | 2928 if (Dest->getType() == IceType_i64) { |
2918 // Follow what GCC does and use a movq instead of what lowerLoad() | 2929 // Follow what GCC does and use a movq instead of what lowerLoad() |
2919 // normally does (split the load into two). | 2930 // normally does (split the load into two). |
2920 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding | 2931 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding |
2921 // can't happen anyway, since this is x86-32 and integer arithmetic only | 2932 // can't happen anyway, since this is x86-32 and integer arithmetic only |
2922 // happens on 32-bit quantities. | 2933 // happens on 32-bit quantities. |
(...skipping 12 matching lines...) Expand all Loading... |
2935 lowerLoad(Load); | 2946 lowerLoad(Load); |
2936 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. | 2947 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. |
2937 // Since lowerLoad may fuse the load w/ an arithmetic instruction, | 2948 // Since lowerLoad may fuse the load w/ an arithmetic instruction, |
2938 // insert the FakeUse on the last-inserted instruction's dest. | 2949 // insert the FakeUse on the last-inserted instruction's dest. |
2939 Context.insert( | 2950 Context.insert( |
2940 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); | 2951 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
2941 return; | 2952 return; |
2942 } | 2953 } |
2943 case Intrinsics::AtomicRMW: | 2954 case Intrinsics::AtomicRMW: |
2944 if (!Intrinsics::VerifyMemoryOrder( | 2955 if (!Intrinsics::VerifyMemoryOrder( |
2945 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) { | 2956 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) { |
2946 Func->setError("Unexpected memory ordering for AtomicRMW"); | 2957 Func->setError("Unexpected memory ordering for AtomicRMW"); |
2947 return; | 2958 return; |
2948 } | 2959 } |
2949 lowerAtomicRMW(Instr->getDest(), | 2960 lowerAtomicRMW(Instr->getDest(), |
2950 static_cast<uint32_t>(llvm::cast<ConstantInteger32>( | 2961 static_cast<uint32_t>(llvm::cast<ConstantInteger32>( |
2951 Instr->getArg(0))->getValue()), | 2962 Instr->getArg(0))->getValue()), |
2952 Instr->getArg(1), Instr->getArg(2)); | 2963 Instr->getArg(1), Instr->getArg(2)); |
2953 return; | 2964 return; |
2954 case Intrinsics::AtomicStore: { | 2965 case Intrinsics::AtomicStore: { |
2955 if (!Intrinsics::VerifyMemoryOrder( | 2966 if (!Intrinsics::VerifyMemoryOrder( |
2956 llvm::cast<ConstantInteger32>(Instr->getArg(2))->getValue())) { | 2967 llvm::cast<ConstantInteger32>(Instr->getArg(2))->getValue())) { |
2957 Func->setError("Unexpected memory ordering for AtomicStore"); | 2968 Func->setError("Unexpected memory ordering for AtomicStore"); |
2958 return; | 2969 return; |
2959 } | 2970 } |
2960 // We require the memory address to be naturally aligned. | 2971 // We require the memory address to be naturally aligned. |
2961 // Given that is the case, then normal stores are atomic. | 2972 // Given that is the case, then normal stores are atomic. |
2962 // Add a fence after the store to make it visible. | 2973 // Add a fence after the store to make it visible. |
2963 Operand *Value = Instr->getArg(0); | 2974 Operand *Value = Instr->getArg(0); |
2964 Operand *Ptr = Instr->getArg(1); | 2975 Operand *Ptr = Instr->getArg(1); |
2965 if (Value->getType() == IceType_i64) { | 2976 if (Value->getType() == IceType_i64) { |
2966 // Use a movq instead of what lowerStore() normally does | 2977 // Use a movq instead of what lowerStore() normally does |
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3100 InstCall *Call = makeHelperCall("memset", NULL, 3); | 3111 InstCall *Call = makeHelperCall("memset", NULL, 3); |
3101 Call->addArg(Instr->getArg(0)); | 3112 Call->addArg(Instr->getArg(0)); |
3102 Call->addArg(ValExt); | 3113 Call->addArg(ValExt); |
3103 Call->addArg(Instr->getArg(2)); | 3114 Call->addArg(Instr->getArg(2)); |
3104 lowerCall(Call); | 3115 lowerCall(Call); |
3105 return; | 3116 return; |
3106 } | 3117 } |
3107 case Intrinsics::NaClReadTP: { | 3118 case Intrinsics::NaClReadTP: { |
3108 if (Ctx->getFlags().UseSandboxing) { | 3119 if (Ctx->getFlags().UseSandboxing) { |
3109 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 3120 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
3110 Operand *Src = | 3121 Operand *Src = OperandX8632Mem::create( |
3111 OperandX8632Mem::create(Func, IceType_i32, NULL, Zero, NULL, | 3122 Func, IceType_i32, NULL, Zero, NULL, 0, OperandX8632Mem::SegReg_GS); |
3112 0, OperandX8632Mem::SegReg_GS); | |
3113 Variable *Dest = Instr->getDest(); | 3123 Variable *Dest = Instr->getDest(); |
3114 Variable *T = NULL; | 3124 Variable *T = NULL; |
3115 _mov(T, Src); | 3125 _mov(T, Src); |
3116 _mov(Dest, T); | 3126 _mov(Dest, T); |
3117 } else { | 3127 } else { |
3118 InstCall *Call = makeHelperCall("__nacl_read_tp", Instr->getDest(), 0); | 3128 InstCall *Call = makeHelperCall("__nacl_read_tp", Instr->getDest(), 0); |
3119 lowerCall(Call); | 3129 lowerCall(Call); |
3120 } | 3130 } |
3121 return; | 3131 return; |
3122 } | 3132 } |
(...skipping 771 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3894 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); | 3904 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
3895 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0); | 3905 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0); |
3896 _movp(xmm0, ConditionRM); | 3906 _movp(xmm0, ConditionRM); |
3897 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31)); | 3907 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31)); |
3898 _movp(T, SrcFRM); | 3908 _movp(T, SrcFRM); |
3899 _blendvps(T, SrcTRM, xmm0); | 3909 _blendvps(T, SrcTRM, xmm0); |
3900 _movp(Dest, T); | 3910 _movp(Dest, T); |
3901 } else { | 3911 } else { |
3902 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); | 3912 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); |
3903 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 | 3913 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 |
3904 : IceType_v16i8; | 3914 : IceType_v16i8; |
3905 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0); | 3915 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0); |
3906 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); | 3916 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); |
3907 _movp(T, SrcFRM); | 3917 _movp(T, SrcFRM); |
3908 _pblendvb(T, SrcTRM, xmm0); | 3918 _pblendvb(T, SrcTRM, xmm0); |
3909 _movp(Dest, T); | 3919 _movp(Dest, T); |
3910 } | 3920 } |
3911 return; | 3921 return; |
3912 } | 3922 } |
3913 // Lower select without SSE4.1: | 3923 // Lower select without SSE4.1: |
3914 // a=d?b:c ==> | 3924 // a=d?b:c ==> |
(...skipping 611 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4526 Str << "\t.align\t" << Align << "\n"; | 4536 Str << "\t.align\t" << Align << "\n"; |
4527 Str << MangledName << ":\n"; | 4537 Str << MangledName << ":\n"; |
4528 for (SizeT i = 0; i < Size; ++i) { | 4538 for (SizeT i = 0; i < Size; ++i) { |
4529 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 4539 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
4530 } | 4540 } |
4531 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4541 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
4532 } | 4542 } |
4533 } | 4543 } |
4534 | 4544 |
4535 } // end of namespace Ice | 4545 } // end of namespace Ice |
OLD | NEW |