| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 53 const struct TableFcmp_ { | 53 const struct TableFcmp_ { |
| 54 uint32_t Default; | 54 uint32_t Default; |
| 55 bool SwapScalarOperands; | 55 bool SwapScalarOperands; |
| 56 CondX86::BrCond C1, C2; | 56 CondX86::BrCond C1, C2; |
| 57 bool SwapVectorOperands; | 57 bool SwapVectorOperands; |
| 58 CondX86::CmppsCond Predicate; | 58 CondX86::CmppsCond Predicate; |
| 59 } TableFcmp[] = { | 59 } TableFcmp[] = { |
| 60 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ | 60 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ |
| 61 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \ | 61 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \ |
| 62 , | 62 , |
| 63 FCMPX8632_TABLE | 63 FCMPX8632_TABLE |
| 64 #undef X | 64 #undef X |
| 65 }; | 65 }; |
| 66 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp); | 66 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp); |
| 67 | 67 |
| 68 // The following table summarizes the logic for lowering the icmp instruction | 68 // The following table summarizes the logic for lowering the icmp instruction |
| 69 // for i32 and narrower types. Each icmp condition has a clear mapping to an | 69 // for i32 and narrower types. Each icmp condition has a clear mapping to an |
| 70 // x86 conditional branch instruction. | 70 // x86 conditional branch instruction. |
| 71 | 71 |
| 72 const struct TableIcmp32_ { | 72 const struct TableIcmp32_ { |
| 73 CondX86::BrCond Mapping; | 73 CondX86::BrCond Mapping; |
| 74 } TableIcmp32[] = { | 74 } TableIcmp32[] = { |
| 75 #define X(val, C_32, C1_64, C2_64, C3_64) \ | 75 #define X(val, C_32, C1_64, C2_64, C3_64) \ |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 147 } | 147 } |
| 148 | 148 |
| 149 // Value is in bytes. Return Value adjusted to the next highest multiple | 149 // Value is in bytes. Return Value adjusted to the next highest multiple |
| 150 // of the stack alignment. | 150 // of the stack alignment. |
| 151 uint32_t applyStackAlignment(uint32_t Value) { | 151 uint32_t applyStackAlignment(uint32_t Value) { |
| 152 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); | 152 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); |
| 153 } | 153 } |
| 154 | 154 |
| 155 // Instruction set options | 155 // Instruction set options |
| 156 namespace cl = ::llvm::cl; | 156 namespace cl = ::llvm::cl; |
| 157 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( | 157 cl::opt<TargetX8632::X86InstructionSet> |
| 158 "mattr", cl::desc("X86 target attributes"), | 158 CLInstructionSet("mattr", cl::desc("X86 target attributes"), |
| 159 cl::init(TargetX8632::SSE2), | 159 cl::init(TargetX8632::SSE2), |
| 160 cl::values( | 160 cl::values(clEnumValN(TargetX8632::SSE2, "sse2", |
| 161 clEnumValN(TargetX8632::SSE2, "sse2", | 161 "Enable SSE2 instructions (default)"), |
| 162 "Enable SSE2 instructions (default)"), | 162 clEnumValN(TargetX8632::SSE4_1, "sse4.1", |
| 163 clEnumValN(TargetX8632::SSE4_1, "sse4.1", | 163 "Enable SSE 4.1 instructions"), |
| 164 "Enable SSE 4.1 instructions"), clEnumValEnd)); | 164 clEnumValEnd)); |
| 165 | 165 |
| 166 // In some cases, there are x-macros tables for both high-level and | 166 // In some cases, there are x-macros tables for both high-level and |
| 167 // low-level instructions/operands that use the same enum key value. | 167 // low-level instructions/operands that use the same enum key value. |
| 168 // The tables are kept separate to maintain a proper separation | 168 // The tables are kept separate to maintain a proper separation |
| 169 // between abstraction layers. There is a risk that the tables | 169 // between abstraction layers. There is a risk that the tables could |
| 170 // could get out of sync if enum values are reordered or if entries | 170 // get out of sync if enum values are reordered or if entries are |
| 171 // are added or deleted. This dummy function uses static_assert to | 171 // added or deleted. The following dummy namespaces use |
| 172 // ensure everything is kept in sync. | 172 // static_asserts to ensure everything is kept in sync. |
| 173 void __attribute__((unused)) xMacroIntegrityCheck() { | 173 |
| 174 // Validate the enum values in FCMPX8632_TABLE. | 174 // Validate the enum values in FCMPX8632_TABLE. |
| 175 { | 175 namespace dummy1 { |
| 176 // Define a temporary set of enum values based on low-level | 176 // Define a temporary set of enum values based on low-level table |
| 177 // table entries. | 177 // entries. |
| 178 enum _tmp_enum { | 178 enum _tmp_enum { |
| 179 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val, | 179 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val, |
| 180 FCMPX8632_TABLE | 180 FCMPX8632_TABLE |
| 181 #undef X | 181 #undef X |
| 182 _num | 182 _num |
| 183 }; | 183 }; |
| 184 // Define a set of constants based on high-level table entries. | 184 // Define a set of constants based on high-level table entries. |
| 185 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag; | 185 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag; |
| 186 ICEINSTFCMP_TABLE; | 186 ICEINSTFCMP_TABLE; |
| 187 #undef X | 187 #undef X |
| 188 // Define a set of constants based on low-level table entries, | 188 // Define a set of constants based on low-level table entries, and |
| 189 // and ensure the table entry keys are consistent. | 189 // ensure the table entry keys are consistent. |
| 190 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ | 190 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ |
| 191 static const int _table2_##val = _tmp_##val; \ | 191 static const int _table2_##val = _tmp_##val; \ |
| 192 STATIC_ASSERT(_table1_##val == _table2_##val); | 192 static_assert( \ |
| 193 FCMPX8632_TABLE; | 193 _table1_##val == _table2_##val, \ |
| 194 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE"); |
| 195 FCMPX8632_TABLE; |
| 194 #undef X | 196 #undef X |
| 195 // Repeat the static asserts with respect to the high-level | 197 // Repeat the static asserts with respect to the high-level table |
| 196 // table entries in case the high-level table has extra entries. | 198 // entries in case the high-level table has extra entries. |
| 197 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); | 199 #define X(tag, str) \ |
| 198 ICEINSTFCMP_TABLE; | 200 static_assert( \ |
| 201 _table1_##tag == _table2_##tag, \ |
| 202 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE"); |
| 203 ICEINSTFCMP_TABLE; |
| 199 #undef X | 204 #undef X |
| 200 } | 205 } // end of namespace dummy1 |
| 201 | 206 |
| 202 // Validate the enum values in ICMPX8632_TABLE. | 207 // Validate the enum values in ICMPX8632_TABLE. |
| 203 { | 208 namespace dummy2 { |
| 204 // Define a temporary set of enum values based on low-level | 209 // Define a temporary set of enum values based on low-level table |
| 205 // table entries. | 210 // entries. |
| 206 enum _tmp_enum { | 211 enum _tmp_enum { |
| 207 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val, | 212 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val, |
| 208 ICMPX8632_TABLE | 213 ICMPX8632_TABLE |
| 209 #undef X | 214 #undef X |
| 210 _num | 215 _num |
| 211 }; | 216 }; |
| 212 // Define a set of constants based on high-level table entries. | 217 // Define a set of constants based on high-level table entries. |
| 213 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; | 218 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; |
| 214 ICEINSTICMP_TABLE; | 219 ICEINSTICMP_TABLE; |
| 215 #undef X | 220 #undef X |
| 216 // Define a set of constants based on low-level table entries, | 221 // Define a set of constants based on low-level table entries, and |
| 217 // and ensure the table entry keys are consistent. | 222 // ensure the table entry keys are consistent. |
| 218 #define X(val, C_32, C1_64, C2_64, C3_64) \ | 223 #define X(val, C_32, C1_64, C2_64, C3_64) \ |
| 219 static const int _table2_##val = _tmp_##val; \ | 224 static const int _table2_##val = _tmp_##val; \ |
| 220 STATIC_ASSERT(_table1_##val == _table2_##val); | 225 static_assert( \ |
| 221 ICMPX8632_TABLE; | 226 _table1_##val == _table2_##val, \ |
| 227 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE"); |
| 228 ICMPX8632_TABLE; |
| 222 #undef X | 229 #undef X |
| 223 // Repeat the static asserts with respect to the high-level | 230 // Repeat the static asserts with respect to the high-level table |
| 224 // table entries in case the high-level table has extra entries. | 231 // entries in case the high-level table has extra entries. |
| 225 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); | 232 #define X(tag, str) \ |
| 226 ICEINSTICMP_TABLE; | 233 static_assert( \ |
| 234 _table1_##tag == _table2_##tag, \ |
| 235 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE"); |
| 236 ICEINSTICMP_TABLE; |
| 227 #undef X | 237 #undef X |
| 228 } | 238 } // end of namespace dummy2 |
| 229 | 239 |
| 230 // Validate the enum values in ICETYPEX8632_TABLE. | 240 // Validate the enum values in ICETYPEX8632_TABLE. |
| 231 { | 241 namespace dummy3 { |
| 232 // Define a temporary set of enum values based on low-level | 242 // Define a temporary set of enum values based on low-level table |
| 233 // table entries. | 243 // entries. |
| 234 enum _tmp_enum { | 244 enum _tmp_enum { |
| 235 #define X(tag, elementty, cvt, sdss, pack, width) _tmp_##tag, | 245 #define X(tag, elementty, cvt, sdss, pack, width) _tmp_##tag, |
| 236 ICETYPEX8632_TABLE | 246 ICETYPEX8632_TABLE |
| 237 #undef X | 247 #undef X |
| 238 _num | 248 _num |
| 239 }; | 249 }; |
| 240 // Define a set of constants based on high-level table entries. | 250 // Define a set of constants based on high-level table entries. |
| 241 #define X(tag, size, align, elts, elty, str) \ | 251 #define X(tag, size, align, elts, elty, str) \ |
| 242 static const int _table1_##tag = tag; | 252 static const int _table1_##tag = tag; |
| 243 ICETYPE_TABLE; | 253 ICETYPE_TABLE; |
| 244 #undef X | 254 #undef X |
| 245 // Define a set of constants based on low-level table entries, | 255 // Define a set of constants based on low-level table entries, and |
| 246 // and ensure the table entry keys are consistent. | 256 // ensure the table entry keys are consistent. |
| 247 #define X(tag, elementty, cvt, sdss, pack, width) \ | 257 #define X(tag, elementty, cvt, sdss, pack, width) \ |
| 248 static const int _table2_##tag = _tmp_##tag; \ | 258 static const int _table2_##tag = _tmp_##tag; \ |
| 249 STATIC_ASSERT(_table1_##tag == _table2_##tag); | 259 static_assert(_table1_##tag == _table2_##tag, \ |
| 250 ICETYPEX8632_TABLE; | 260 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); |
| 261 ICETYPEX8632_TABLE; |
| 251 #undef X | 262 #undef X |
| 252 // Repeat the static asserts with respect to the high-level | 263 // Repeat the static asserts with respect to the high-level table |
| 253 // table entries in case the high-level table has extra entries. | 264 // entries in case the high-level table has extra entries. |
| 254 #define X(tag, size, align, elts, elty, str) \ | 265 #define X(tag, size, align, elts, elty, str) \ |
| 255 STATIC_ASSERT(_table1_##tag == _table2_##tag); | 266 static_assert(_table1_##tag == _table2_##tag, \ |
| 256 ICETYPE_TABLE; | 267 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); |
| 268 ICETYPE_TABLE; |
| 257 #undef X | 269 #undef X |
| 258 } | 270 } // end of namespace dummy3 |
| 259 } | |
| 260 | 271 |
| 261 } // end of anonymous namespace | 272 } // end of anonymous namespace |
| 262 | 273 |
| 263 TargetX8632::TargetX8632(Cfg *Func) | 274 TargetX8632::TargetX8632(Cfg *Func) |
| 264 : TargetLowering(Func), InstructionSet(CLInstructionSet), | 275 : TargetLowering(Func), InstructionSet(CLInstructionSet), |
| 265 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), | 276 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), |
| 266 SpillAreaSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false), | 277 SpillAreaSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false), |
| 267 PhysicalRegisters(VarList(RegX8632::Reg_NUM)) { | 278 PhysicalRegisters(VarList(RegX8632::Reg_NUM)) { |
| 268 // TODO: Don't initialize IntegerRegisters and friends every time. | 279 // TODO: Don't initialize IntegerRegisters and friends every time. |
| 269 // Instead, initialize in some sort of static initializer for the | 280 // Instead, initialize in some sort of static initializer for the |
| (...skipping 2520 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2790 // | 2801 // |
| 2791 // insertelement into index 2 (result is stored in T): | 2802 // insertelement into index 2 (result is stored in T): |
| 2792 // T := SourceVectRM | 2803 // T := SourceVectRM |
| 2793 // ElementR := ElementR[0, 0] T[0, 3] | 2804 // ElementR := ElementR[0, 0] T[0, 3] |
| 2794 // T := T[0, 1] ElementR[0, 3] | 2805 // T := T[0, 1] ElementR[0, 3] |
| 2795 // | 2806 // |
| 2796 // insertelement into index 3 (result is stored in T): | 2807 // insertelement into index 3 (result is stored in T): |
| 2797 // T := SourceVectRM | 2808 // T := SourceVectRM |
| 2798 // ElementR := ElementR[0, 0] T[0, 2] | 2809 // ElementR := ElementR[0, 0] T[0, 2] |
| 2799 // T := T[0, 1] ElementR[3, 0] | 2810 // T := T[0, 1] ElementR[3, 0] |
| 2800 const unsigned char Mask1[3] = {0, 192, 128}; | 2811 const unsigned char Mask1[3] = { 0, 192, 128 }; |
| 2801 const unsigned char Mask2[3] = {227, 196, 52}; | 2812 const unsigned char Mask2[3] = { 227, 196, 52 }; |
| 2802 | 2813 |
| 2803 Constant *Mask1Constant = | 2814 Constant *Mask1Constant = |
| 2804 Ctx->getConstantInt32(IceType_i8, Mask1[Index - 1]); | 2815 Ctx->getConstantInt32(IceType_i8, Mask1[Index - 1]); |
| 2805 Constant *Mask2Constant = | 2816 Constant *Mask2Constant = |
| 2806 Ctx->getConstantInt32(IceType_i8, Mask2[Index - 1]); | 2817 Ctx->getConstantInt32(IceType_i8, Mask2[Index - 1]); |
| 2807 | 2818 |
| 2808 if (Index == 1) { | 2819 if (Index == 1) { |
| 2809 _shufps(ElementR, SourceVectRM, Mask1Constant); | 2820 _shufps(ElementR, SourceVectRM, Mask1Constant); |
| 2810 _shufps(ElementR, SourceVectRM, Mask2Constant); | 2821 _shufps(ElementR, SourceVectRM, Mask2Constant); |
| 2811 _movp(Inst->getDest(), ElementR); | 2822 _movp(Inst->getDest(), ElementR); |
| (...skipping 24 matching lines...) Expand all Loading... |
| 2836 Variable *T = makeReg(Ty); | 2847 Variable *T = makeReg(Ty); |
| 2837 _movp(T, Slot); | 2848 _movp(T, Slot); |
| 2838 _movp(Inst->getDest(), T); | 2849 _movp(Inst->getDest(), T); |
| 2839 } | 2850 } |
| 2840 } | 2851 } |
| 2841 | 2852 |
| 2842 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 2853 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
| 2843 switch (Instr->getIntrinsicInfo().ID) { | 2854 switch (Instr->getIntrinsicInfo().ID) { |
| 2844 case Intrinsics::AtomicCmpxchg: { | 2855 case Intrinsics::AtomicCmpxchg: { |
| 2845 if (!Intrinsics::VerifyMemoryOrder( | 2856 if (!Intrinsics::VerifyMemoryOrder( |
| 2846 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) { | 2857 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) { |
| 2847 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); | 2858 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); |
| 2848 return; | 2859 return; |
| 2849 } | 2860 } |
| 2850 if (!Intrinsics::VerifyMemoryOrder( | 2861 if (!Intrinsics::VerifyMemoryOrder( |
| 2851 llvm::cast<ConstantInteger32>(Instr->getArg(4))->getValue())) { | 2862 llvm::cast<ConstantInteger32>(Instr->getArg(4))->getValue())) { |
| 2852 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); | 2863 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); |
| 2853 return; | 2864 return; |
| 2854 } | 2865 } |
| 2855 Variable *DestPrev = Instr->getDest(); | 2866 Variable *DestPrev = Instr->getDest(); |
| 2856 Operand *PtrToMem = Instr->getArg(0); | 2867 Operand *PtrToMem = Instr->getArg(0); |
| 2857 Operand *Expected = Instr->getArg(1); | 2868 Operand *Expected = Instr->getArg(1); |
| 2858 Operand *Desired = Instr->getArg(2); | 2869 Operand *Desired = Instr->getArg(2); |
| 2859 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired)) | 2870 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired)) |
| 2860 return; | 2871 return; |
| 2861 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired); | 2872 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired); |
| 2862 return; | 2873 return; |
| 2863 } | 2874 } |
| 2864 case Intrinsics::AtomicFence: | 2875 case Intrinsics::AtomicFence: |
| 2865 if (!Intrinsics::VerifyMemoryOrder( | 2876 if (!Intrinsics::VerifyMemoryOrder( |
| 2866 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue())) { | 2877 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue())) { |
| 2867 Func->setError("Unexpected memory ordering for AtomicFence"); | 2878 Func->setError("Unexpected memory ordering for AtomicFence"); |
| 2868 return; | 2879 return; |
| 2869 } | 2880 } |
| 2870 _mfence(); | 2881 _mfence(); |
| 2871 return; | 2882 return; |
| 2872 case Intrinsics::AtomicFenceAll: | 2883 case Intrinsics::AtomicFenceAll: |
| 2873 // NOTE: FenceAll should prevent and load/store from being moved | 2884 // NOTE: FenceAll should prevent and load/store from being moved |
| 2874 // across the fence (both atomic and non-atomic). The InstX8632Mfence | 2885 // across the fence (both atomic and non-atomic). The InstX8632Mfence |
| 2875 // instruction is currently marked coarsely as "HasSideEffects". | 2886 // instruction is currently marked coarsely as "HasSideEffects". |
| 2876 _mfence(); | 2887 _mfence(); |
| (...skipping 25 matching lines...) Expand all Loading... |
| 2902 return; | 2913 return; |
| 2903 } | 2914 } |
| 2904 // The PNaCl ABI requires the byte size to be a compile-time constant. | 2915 // The PNaCl ABI requires the byte size to be a compile-time constant. |
| 2905 Func->setError("AtomicIsLockFree byte size should be compile-time const"); | 2916 Func->setError("AtomicIsLockFree byte size should be compile-time const"); |
| 2906 return; | 2917 return; |
| 2907 } | 2918 } |
| 2908 case Intrinsics::AtomicLoad: { | 2919 case Intrinsics::AtomicLoad: { |
| 2909 // We require the memory address to be naturally aligned. | 2920 // We require the memory address to be naturally aligned. |
| 2910 // Given that is the case, then normal loads are atomic. | 2921 // Given that is the case, then normal loads are atomic. |
| 2911 if (!Intrinsics::VerifyMemoryOrder( | 2922 if (!Intrinsics::VerifyMemoryOrder( |
| 2912 llvm::cast<ConstantInteger32>(Instr->getArg(1))->getValue())) { | 2923 llvm::cast<ConstantInteger32>(Instr->getArg(1))->getValue())) { |
| 2913 Func->setError("Unexpected memory ordering for AtomicLoad"); | 2924 Func->setError("Unexpected memory ordering for AtomicLoad"); |
| 2914 return; | 2925 return; |
| 2915 } | 2926 } |
| 2916 Variable *Dest = Instr->getDest(); | 2927 Variable *Dest = Instr->getDest(); |
| 2917 if (Dest->getType() == IceType_i64) { | 2928 if (Dest->getType() == IceType_i64) { |
| 2918 // Follow what GCC does and use a movq instead of what lowerLoad() | 2929 // Follow what GCC does and use a movq instead of what lowerLoad() |
| 2919 // normally does (split the load into two). | 2930 // normally does (split the load into two). |
| 2920 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding | 2931 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding |
| 2921 // can't happen anyway, since this is x86-32 and integer arithmetic only | 2932 // can't happen anyway, since this is x86-32 and integer arithmetic only |
| 2922 // happens on 32-bit quantities. | 2933 // happens on 32-bit quantities. |
| (...skipping 12 matching lines...) Expand all Loading... |
| 2935 lowerLoad(Load); | 2946 lowerLoad(Load); |
| 2936 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. | 2947 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. |
| 2937 // Since lowerLoad may fuse the load w/ an arithmetic instruction, | 2948 // Since lowerLoad may fuse the load w/ an arithmetic instruction, |
| 2938 // insert the FakeUse on the last-inserted instruction's dest. | 2949 // insert the FakeUse on the last-inserted instruction's dest. |
| 2939 Context.insert( | 2950 Context.insert( |
| 2940 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); | 2951 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
| 2941 return; | 2952 return; |
| 2942 } | 2953 } |
| 2943 case Intrinsics::AtomicRMW: | 2954 case Intrinsics::AtomicRMW: |
| 2944 if (!Intrinsics::VerifyMemoryOrder( | 2955 if (!Intrinsics::VerifyMemoryOrder( |
| 2945 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) { | 2956 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) { |
| 2946 Func->setError("Unexpected memory ordering for AtomicRMW"); | 2957 Func->setError("Unexpected memory ordering for AtomicRMW"); |
| 2947 return; | 2958 return; |
| 2948 } | 2959 } |
| 2949 lowerAtomicRMW(Instr->getDest(), | 2960 lowerAtomicRMW(Instr->getDest(), |
| 2950 static_cast<uint32_t>(llvm::cast<ConstantInteger32>( | 2961 static_cast<uint32_t>(llvm::cast<ConstantInteger32>( |
| 2951 Instr->getArg(0))->getValue()), | 2962 Instr->getArg(0))->getValue()), |
| 2952 Instr->getArg(1), Instr->getArg(2)); | 2963 Instr->getArg(1), Instr->getArg(2)); |
| 2953 return; | 2964 return; |
| 2954 case Intrinsics::AtomicStore: { | 2965 case Intrinsics::AtomicStore: { |
| 2955 if (!Intrinsics::VerifyMemoryOrder( | 2966 if (!Intrinsics::VerifyMemoryOrder( |
| 2956 llvm::cast<ConstantInteger32>(Instr->getArg(2))->getValue())) { | 2967 llvm::cast<ConstantInteger32>(Instr->getArg(2))->getValue())) { |
| 2957 Func->setError("Unexpected memory ordering for AtomicStore"); | 2968 Func->setError("Unexpected memory ordering for AtomicStore"); |
| 2958 return; | 2969 return; |
| 2959 } | 2970 } |
| 2960 // We require the memory address to be naturally aligned. | 2971 // We require the memory address to be naturally aligned. |
| 2961 // Given that is the case, then normal stores are atomic. | 2972 // Given that is the case, then normal stores are atomic. |
| 2962 // Add a fence after the store to make it visible. | 2973 // Add a fence after the store to make it visible. |
| 2963 Operand *Value = Instr->getArg(0); | 2974 Operand *Value = Instr->getArg(0); |
| 2964 Operand *Ptr = Instr->getArg(1); | 2975 Operand *Ptr = Instr->getArg(1); |
| 2965 if (Value->getType() == IceType_i64) { | 2976 if (Value->getType() == IceType_i64) { |
| 2966 // Use a movq instead of what lowerStore() normally does | 2977 // Use a movq instead of what lowerStore() normally does |
| (...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3100 InstCall *Call = makeHelperCall("memset", NULL, 3); | 3111 InstCall *Call = makeHelperCall("memset", NULL, 3); |
| 3101 Call->addArg(Instr->getArg(0)); | 3112 Call->addArg(Instr->getArg(0)); |
| 3102 Call->addArg(ValExt); | 3113 Call->addArg(ValExt); |
| 3103 Call->addArg(Instr->getArg(2)); | 3114 Call->addArg(Instr->getArg(2)); |
| 3104 lowerCall(Call); | 3115 lowerCall(Call); |
| 3105 return; | 3116 return; |
| 3106 } | 3117 } |
| 3107 case Intrinsics::NaClReadTP: { | 3118 case Intrinsics::NaClReadTP: { |
| 3108 if (Ctx->getFlags().UseSandboxing) { | 3119 if (Ctx->getFlags().UseSandboxing) { |
| 3109 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 3120 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 3110 Operand *Src = | 3121 Operand *Src = OperandX8632Mem::create( |
| 3111 OperandX8632Mem::create(Func, IceType_i32, NULL, Zero, NULL, | 3122 Func, IceType_i32, NULL, Zero, NULL, 0, OperandX8632Mem::SegReg_GS); |
| 3112 0, OperandX8632Mem::SegReg_GS); | |
| 3113 Variable *Dest = Instr->getDest(); | 3123 Variable *Dest = Instr->getDest(); |
| 3114 Variable *T = NULL; | 3124 Variable *T = NULL; |
| 3115 _mov(T, Src); | 3125 _mov(T, Src); |
| 3116 _mov(Dest, T); | 3126 _mov(Dest, T); |
| 3117 } else { | 3127 } else { |
| 3118 InstCall *Call = makeHelperCall("__nacl_read_tp", Instr->getDest(), 0); | 3128 InstCall *Call = makeHelperCall("__nacl_read_tp", Instr->getDest(), 0); |
| 3119 lowerCall(Call); | 3129 lowerCall(Call); |
| 3120 } | 3130 } |
| 3121 return; | 3131 return; |
| 3122 } | 3132 } |
| (...skipping 771 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3894 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); | 3904 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
| 3895 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0); | 3905 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0); |
| 3896 _movp(xmm0, ConditionRM); | 3906 _movp(xmm0, ConditionRM); |
| 3897 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31)); | 3907 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31)); |
| 3898 _movp(T, SrcFRM); | 3908 _movp(T, SrcFRM); |
| 3899 _blendvps(T, SrcTRM, xmm0); | 3909 _blendvps(T, SrcTRM, xmm0); |
| 3900 _movp(Dest, T); | 3910 _movp(Dest, T); |
| 3901 } else { | 3911 } else { |
| 3902 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); | 3912 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); |
| 3903 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 | 3913 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 |
| 3904 : IceType_v16i8; | 3914 : IceType_v16i8; |
| 3905 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0); | 3915 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0); |
| 3906 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); | 3916 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); |
| 3907 _movp(T, SrcFRM); | 3917 _movp(T, SrcFRM); |
| 3908 _pblendvb(T, SrcTRM, xmm0); | 3918 _pblendvb(T, SrcTRM, xmm0); |
| 3909 _movp(Dest, T); | 3919 _movp(Dest, T); |
| 3910 } | 3920 } |
| 3911 return; | 3921 return; |
| 3912 } | 3922 } |
| 3913 // Lower select without SSE4.1: | 3923 // Lower select without SSE4.1: |
| 3914 // a=d?b:c ==> | 3924 // a=d?b:c ==> |
| (...skipping 611 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4526 Str << "\t.align\t" << Align << "\n"; | 4536 Str << "\t.align\t" << Align << "\n"; |
| 4527 Str << MangledName << ":\n"; | 4537 Str << MangledName << ":\n"; |
| 4528 for (SizeT i = 0; i < Size; ++i) { | 4538 for (SizeT i = 0; i < Size; ++i) { |
| 4529 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 4539 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
| 4530 } | 4540 } |
| 4531 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4541 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
| 4532 } | 4542 } |
| 4533 } | 4543 } |
| 4534 | 4544 |
| 4535 } // end of namespace Ice | 4545 } // end of namespace Ice |
| OLD | NEW |