OLD | NEW |
1 ; Tests various aspects of x86 opcode encodings. E.g., some opcodes like | 1 ; Tests various aspects of x86 opcode encodings. E.g., some opcodes like |
2 ; those for pmull vary more wildly depending on operand size (rather than | 2 ; those for pmull vary more wildly depending on operand size (rather than |
3 ; follow a usual pattern). | 3 ; follow a usual pattern). |
4 | 4 |
5 ; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 -mattr=sse4.1 \ | 5 ; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 -mattr=sse4.1 \ |
6 ; RUN: -sandbox | FileCheck %s | 6 ; RUN: -sandbox | FileCheck %s |
7 | 7 |
8 define <8 x i16> @test_mul_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) { | 8 define internal <8 x i16> @test_mul_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) { |
9 entry: | 9 entry: |
10 %res = mul <8 x i16> %arg0, %arg1 | 10 %res = mul <8 x i16> %arg0, %arg1 |
11 ret <8 x i16> %res | 11 ret <8 x i16> %res |
12 ; CHECK-LABEL: test_mul_v8i16 | 12 ; CHECK-LABEL: test_mul_v8i16 |
13 ; CHECK: 66 0f d5 c1 pmullw xmm0,xmm1 | 13 ; CHECK: 66 0f d5 c1 pmullw xmm0,xmm1 |
14 } | 14 } |
15 | 15 |
16 ; Test register and address mode encoding. | 16 ; Test register and address mode encoding. |
17 define <8 x i16> @test_mul_v8i16_more_regs(<8 x i1> %cond, <8 x i16> %arg0, <8 x
i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3, <8 x i16> %arg4, <8 x i16> %arg5,
<8 x i16> %arg6, <8 x i16> %arg7, <8 x i16> %arg8) { | 17 define internal <8 x i16> @test_mul_v8i16_more_regs( |
| 18 <8 x i1> %cond, <8 x i16> %arg0, <8 x i16> %arg1, <8 x i16> %arg2, |
| 19 <8 x i16> %arg3, <8 x i16> %arg4, <8 x i16> %arg5, <8 x i16> %arg6, |
| 20 <8 x i16> %arg7, <8 x i16> %arg8) { |
18 entry: | 21 entry: |
19 %res1 = sub <8 x i16> %arg0, %arg1 | 22 %res1 = sub <8 x i16> %arg0, %arg1 |
20 %res2 = sub <8 x i16> %arg0, %arg2 | 23 %res2 = sub <8 x i16> %arg0, %arg2 |
21 %res3 = sub <8 x i16> %arg0, %arg3 | 24 %res3 = sub <8 x i16> %arg0, %arg3 |
22 %res4 = sub <8 x i16> %arg0, %arg4 | 25 %res4 = sub <8 x i16> %arg0, %arg4 |
23 %res5 = sub <8 x i16> %arg0, %arg5 | 26 %res5 = sub <8 x i16> %arg0, %arg5 |
24 %res6 = sub <8 x i16> %arg0, %arg6 | 27 %res6 = sub <8 x i16> %arg0, %arg6 |
25 %res7 = sub <8 x i16> %arg0, %arg7 | 28 %res7 = sub <8 x i16> %arg0, %arg7 |
26 %res8 = sub <8 x i16> %arg0, %arg8 | 29 %res8 = sub <8 x i16> %arg0, %arg8 |
27 %res_acc1 = select <8 x i1> %cond, <8 x i16> %res1, <8 x i16> %res2 | 30 %res_acc1 = select <8 x i1> %cond, <8 x i16> %res1, <8 x i16> %res2 |
28 %res_acc2 = select <8 x i1> %cond, <8 x i16> %res3, <8 x i16> %res4 | 31 %res_acc2 = select <8 x i1> %cond, <8 x i16> %res3, <8 x i16> %res4 |
29 %res_acc3 = select <8 x i1> %cond, <8 x i16> %res5, <8 x i16> %res6 | 32 %res_acc3 = select <8 x i1> %cond, <8 x i16> %res5, <8 x i16> %res6 |
30 %res_acc4 = select <8 x i1> %cond, <8 x i16> %res7, <8 x i16> %res8 | 33 %res_acc4 = select <8 x i1> %cond, <8 x i16> %res7, <8 x i16> %res8 |
31 %res_acc1_3 = select <8 x i1> %cond, <8 x i16> %res_acc1, <8 x i16> %res_acc3 | 34 %res_acc1_3 = select <8 x i1> %cond, <8 x i16> %res_acc1, <8 x i16> %res_acc3 |
32 %res_acc2_4 = select <8 x i1> %cond, <8 x i16> %res_acc2, <8 x i16> %res_acc4 | 35 %res_acc2_4 = select <8 x i1> %cond, <8 x i16> %res_acc2, <8 x i16> %res_acc4 |
33 %res = select <8 x i1> %cond, <8 x i16> %res_acc1_3, <8 x i16> %res_acc2_4 | 36 %res = select <8 x i1> %cond, <8 x i16> %res_acc1_3, <8 x i16> %res_acc2_4 |
34 ret <8 x i16> %res | 37 ret <8 x i16> %res |
35 ; CHECK-LABEL: test_mul_v8i16_more_regs | 38 ; CHECK-LABEL: test_mul_v8i16_more_regs |
36 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 39 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
37 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 40 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
38 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 41 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
39 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 42 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
40 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 43 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
41 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 44 ; CHECK-DAG: psubw xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
42 ; CHECK-DAG: psubw xmm0,XMMWORD PTR [esp | 45 ; CHECK-DAG: psubw xmm0,XMMWORD PTR [esp |
43 ; CHECK-DAG: psubw xmm1,XMMWORD PTR [esp | 46 ; CHECK-DAG: psubw xmm1,XMMWORD PTR [esp |
44 } | 47 } |
45 | 48 |
46 define <4 x i32> @test_mul_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) { | 49 define internal <4 x i32> @test_mul_v4i32(<4 x i32> %arg0, <4 x i32> %arg1) { |
47 entry: | 50 entry: |
48 %res = mul <4 x i32> %arg0, %arg1 | 51 %res = mul <4 x i32> %arg0, %arg1 |
49 ret <4 x i32> %res | 52 ret <4 x i32> %res |
50 ; CHECK-LABEL: test_mul_v4i32 | 53 ; CHECK-LABEL: test_mul_v4i32 |
51 ; CHECK: 66 0f 38 40 c1 pmulld xmm0,xmm1 | 54 ; CHECK: 66 0f 38 40 c1 pmulld xmm0,xmm1 |
52 } | 55 } |
53 | 56 |
54 define <4 x i32> @test_mul_v4i32_more_regs(<4 x i1> %cond, <4 x i32> %arg0, <4 x
i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3, <4 x i32> %arg4, <4 x i32> %arg5,
<4 x i32> %arg6, <4 x i32> %arg7, <4 x i32> %arg8) { | 57 define internal <4 x i32> @test_mul_v4i32_more_regs( |
| 58 <4 x i1> %cond, <4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, |
| 59 <4 x i32> %arg3, <4 x i32> %arg4, <4 x i32> %arg5, <4 x i32> %arg6, |
| 60 <4 x i32> %arg7, <4 x i32> %arg8) { |
55 entry: | 61 entry: |
56 %res1 = sub <4 x i32> %arg0, %arg1 | 62 %res1 = sub <4 x i32> %arg0, %arg1 |
57 %res2 = sub <4 x i32> %arg0, %arg2 | 63 %res2 = sub <4 x i32> %arg0, %arg2 |
58 %res3 = sub <4 x i32> %arg0, %arg3 | 64 %res3 = sub <4 x i32> %arg0, %arg3 |
59 %res4 = sub <4 x i32> %arg0, %arg4 | 65 %res4 = sub <4 x i32> %arg0, %arg4 |
60 %res5 = sub <4 x i32> %arg0, %arg5 | 66 %res5 = sub <4 x i32> %arg0, %arg5 |
61 %res6 = sub <4 x i32> %arg0, %arg6 | 67 %res6 = sub <4 x i32> %arg0, %arg6 |
62 %res7 = sub <4 x i32> %arg0, %arg7 | 68 %res7 = sub <4 x i32> %arg0, %arg7 |
63 %res8 = sub <4 x i32> %arg0, %arg8 | 69 %res8 = sub <4 x i32> %arg0, %arg8 |
64 %res_acc1 = select <4 x i1> %cond, <4 x i32> %res1, <4 x i32> %res2 | 70 %res_acc1 = select <4 x i1> %cond, <4 x i32> %res1, <4 x i32> %res2 |
(...skipping 11 matching lines...) Expand all Loading... |
76 ; CHECK-DAG: psubd xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 82 ; CHECK-DAG: psubd xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
77 ; CHECK-DAG: psubd xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 83 ; CHECK-DAG: psubd xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
78 ; CHECK-DAG: psubd xmm0,{{xmm[0-7]|xmmword ptr\[esp}} | 84 ; CHECK-DAG: psubd xmm0,{{xmm[0-7]|xmmword ptr\[esp}} |
79 ; CHECK-DAG: psubd xmm0,XMMWORD PTR [esp | 85 ; CHECK-DAG: psubd xmm0,XMMWORD PTR [esp |
80 ; CHECK-DAG: psubd xmm1,XMMWORD PTR [esp | 86 ; CHECK-DAG: psubd xmm1,XMMWORD PTR [esp |
81 } | 87 } |
82 | 88 |
83 ; Test movq, which is used by atomic stores. | 89 ; Test movq, which is used by atomic stores. |
84 declare void @llvm.nacl.atomic.store.i64(i64, i64*, i32) | 90 declare void @llvm.nacl.atomic.store.i64(i64, i64*, i32) |
85 | 91 |
86 define void @test_atomic_store_64(i32 %iptr, i32 %iptr2, i32 %iptr3, i64 %v) { | 92 define internal void @test_atomic_store_64(i32 %iptr, i32 %iptr2, |
| 93 i32 %iptr3, i64 %v) { |
87 entry: | 94 entry: |
88 %ptr = inttoptr i32 %iptr to i64* | 95 %ptr = inttoptr i32 %iptr to i64* |
89 %ptr2 = inttoptr i32 %iptr2 to i64* | 96 %ptr2 = inttoptr i32 %iptr2 to i64* |
90 %ptr3 = inttoptr i32 %iptr3 to i64* | 97 %ptr3 = inttoptr i32 %iptr3 to i64* |
91 call void @llvm.nacl.atomic.store.i64(i64 %v, i64* %ptr2, i32 6) | 98 call void @llvm.nacl.atomic.store.i64(i64 %v, i64* %ptr2, i32 6) |
92 call void @llvm.nacl.atomic.store.i64(i64 1234567891024, i64* %ptr, i32 6) | 99 call void @llvm.nacl.atomic.store.i64(i64 1234567891024, i64* %ptr, i32 6) |
93 call void @llvm.nacl.atomic.store.i64(i64 %v, i64* %ptr3, i32 6) | 100 call void @llvm.nacl.atomic.store.i64(i64 %v, i64* %ptr3, i32 6) |
94 ret void | 101 ret void |
95 } | 102 } |
96 ; CHECK-LABEL: test_atomic_store_64 | 103 ; CHECK-LABEL: test_atomic_store_64 |
97 ; CHECK-DAG: f3 0f 7e 04 24 movq xmm0,QWORD PTR [esp] | 104 ; CHECK-DAG: f3 0f 7e 04 24 movq xmm0,QWORD PTR [esp] |
98 ; CHECK-DAG: f3 0f 7e 44 24 08 movq xmm0,QWORD PTR [esp | 105 ; CHECK-DAG: f3 0f 7e 44 24 08 movq xmm0,QWORD PTR [esp |
99 ; CHECK-DAG: 66 0f d6 0{{.*}} movq QWORD PTR [e{{.*}}],xmm0 | 106 ; CHECK-DAG: 66 0f d6 0{{.*}} movq QWORD PTR [e{{.*}}],xmm0 |
100 | 107 |
101 ; Test "movups" via vector stores and loads. | 108 ; Test "movups" via vector stores and loads. |
102 define void @store_v16xI8(i32 %addr, i32 %addr2, i32 %addr3, <16 x i8> %v) { | 109 define internal void @store_v16xI8(i32 %addr, i32 %addr2, i32 %addr3, |
| 110 <16 x i8> %v) { |
103 %addr_v16xI8 = inttoptr i32 %addr to <16 x i8>* | 111 %addr_v16xI8 = inttoptr i32 %addr to <16 x i8>* |
104 %addr2_v16xI8 = inttoptr i32 %addr2 to <16 x i8>* | 112 %addr2_v16xI8 = inttoptr i32 %addr2 to <16 x i8>* |
105 %addr3_v16xI8 = inttoptr i32 %addr3 to <16 x i8>* | 113 %addr3_v16xI8 = inttoptr i32 %addr3 to <16 x i8>* |
106 store <16 x i8> %v, <16 x i8>* %addr2_v16xI8, align 1 | 114 store <16 x i8> %v, <16 x i8>* %addr2_v16xI8, align 1 |
107 store <16 x i8> %v, <16 x i8>* %addr_v16xI8, align 1 | 115 store <16 x i8> %v, <16 x i8>* %addr_v16xI8, align 1 |
108 store <16 x i8> %v, <16 x i8>* %addr3_v16xI8, align 1 | 116 store <16 x i8> %v, <16 x i8>* %addr3_v16xI8, align 1 |
109 ret void | 117 ret void |
110 } | 118 } |
111 ; CHECK-LABEL: store_v16xI8 | 119 ; CHECK-LABEL: store_v16xI8 |
112 ; CHECK: 0f 11 0{{.*}} movups XMMWORD PTR [e{{.*}}],xmm0 | 120 ; CHECK: 0f 11 0{{.*}} movups XMMWORD PTR [e{{.*}}],xmm0 |
113 | 121 |
114 define <16 x i8> @load_v16xI8(i32 %addr, i32 %addr2, i32 %addr3) { | 122 define internal <16 x i8> @load_v16xI8(i32 %addr, i32 %addr2, i32 %addr3) { |
115 %addr_v16xI8 = inttoptr i32 %addr to <16 x i8>* | 123 %addr_v16xI8 = inttoptr i32 %addr to <16 x i8>* |
116 %addr2_v16xI8 = inttoptr i32 %addr2 to <16 x i8>* | 124 %addr2_v16xI8 = inttoptr i32 %addr2 to <16 x i8>* |
117 %addr3_v16xI8 = inttoptr i32 %addr3 to <16 x i8>* | 125 %addr3_v16xI8 = inttoptr i32 %addr3 to <16 x i8>* |
118 %res1 = load <16 x i8>, <16 x i8>* %addr2_v16xI8, align 1 | 126 %res1 = load <16 x i8>, <16 x i8>* %addr2_v16xI8, align 1 |
119 %res2 = load <16 x i8>, <16 x i8>* %addr_v16xI8, align 1 | 127 %res2 = load <16 x i8>, <16 x i8>* %addr_v16xI8, align 1 |
120 %res3 = load <16 x i8>, <16 x i8>* %addr3_v16xI8, align 1 | 128 %res3 = load <16 x i8>, <16 x i8>* %addr3_v16xI8, align 1 |
121 %res12 = add <16 x i8> %res1, %res2 | 129 %res12 = add <16 x i8> %res1, %res2 |
122 %res123 = add <16 x i8> %res12, %res3 | 130 %res123 = add <16 x i8> %res12, %res3 |
123 ret <16 x i8> %res123 | 131 ret <16 x i8> %res123 |
124 } | 132 } |
125 ; CHECK-LABEL: load_v16xI8 | 133 ; CHECK-LABEL: load_v16xI8 |
126 ; CHECK: 0f 10 0{{.*}} movups xmm0,XMMWORD PTR [e{{.*}}] | 134 ; CHECK: 0f 10 0{{.*}} movups xmm0,XMMWORD PTR [e{{.*}}] |
127 | 135 |
128 ; Test segment override prefix. This happens w/ nacl.read.tp. | 136 ; Test segment override prefix. This happens w/ nacl.read.tp. |
129 declare i8* @llvm.nacl.read.tp() | 137 declare i8* @llvm.nacl.read.tp() |
130 | 138 |
131 ; Also test more address complex operands via address-mode-optimization. | 139 ; Also test more address complex operands via address-mode-optimization. |
132 define i32 @test_nacl_read_tp_more_addressing() { | 140 define internal i32 @test_nacl_read_tp_more_addressing() { |
133 entry: | 141 entry: |
134 %ptr = call i8* @llvm.nacl.read.tp() | 142 %ptr = call i8* @llvm.nacl.read.tp() |
135 %__1 = ptrtoint i8* %ptr to i32 | 143 %__1 = ptrtoint i8* %ptr to i32 |
136 %x = add i32 %__1, %__1 | 144 %x = add i32 %__1, %__1 |
137 %__3 = inttoptr i32 %x to i32* | 145 %__3 = inttoptr i32 %x to i32* |
138 %v = load i32, i32* %__3, align 1 | 146 %v = load i32, i32* %__3, align 1 |
139 %v_add = add i32 %v, 1 | 147 %v_add = add i32 %v, 1 |
140 | 148 |
141 %ptr2 = call i8* @llvm.nacl.read.tp() | 149 %ptr2 = call i8* @llvm.nacl.read.tp() |
142 %__6 = ptrtoint i8* %ptr2 to i32 | 150 %__6 = ptrtoint i8* %ptr2 to i32 |
(...skipping 12 matching lines...) Expand all Loading... |
155 ; CHECK-LABEL: test_nacl_read_tp_more_addressing | 163 ; CHECK-LABEL: test_nacl_read_tp_more_addressing |
156 ; CHECK: 65 8b 05 00 00 00 00 mov eax,DWORD PTR gs:0x0 | 164 ; CHECK: 65 8b 05 00 00 00 00 mov eax,DWORD PTR gs:0x0 |
157 ; CHECK: 8b 04 00 mov eax,DWORD PTR [eax+eax*1] | 165 ; CHECK: 8b 04 00 mov eax,DWORD PTR [eax+eax*1] |
158 ; CHECK: 65 8b 0d 00 00 00 00 mov ecx,DWORD PTR gs:0x0 | 166 ; CHECK: 65 8b 0d 00 00 00 00 mov ecx,DWORD PTR gs:0x0 |
159 ; CHECK: 89 51 80 mov DWORD PTR [ecx-0x80],edx | 167 ; CHECK: 89 51 80 mov DWORD PTR [ecx-0x80],edx |
160 ; CHECK: 89 91 00 01 00 00 mov DWORD PTR [ecx+0x100],edx | 168 ; CHECK: 89 91 00 01 00 00 mov DWORD PTR [ecx+0x100],edx |
161 | 169 |
162 ; The 16-bit pinsrw/pextrw (SSE2) are quite different from | 170 ; The 16-bit pinsrw/pextrw (SSE2) are quite different from |
163 ; the pinsr{b,d}/pextr{b,d} (SSE4.1). | 171 ; the pinsr{b,d}/pextr{b,d} (SSE4.1). |
164 | 172 |
165 define <4 x i32> @test_pinsrd(<4 x i32> %vec, i32 %elt1, i32 %elt2, i32 %elt3, i
32 %elt4) { | 173 define internal <4 x i32> @test_pinsrd(<4 x i32> %vec, i32 %elt1, i32 %elt2, |
| 174 i32 %elt3, i32 %elt4) { |
166 entry: | 175 entry: |
167 %elt12 = add i32 %elt1, %elt2 | 176 %elt12 = add i32 %elt1, %elt2 |
168 %elt34 = add i32 %elt3, %elt4 | 177 %elt34 = add i32 %elt3, %elt4 |
169 %res1 = insertelement <4 x i32> %vec, i32 %elt12, i32 1 | 178 %res1 = insertelement <4 x i32> %vec, i32 %elt12, i32 1 |
170 %res2 = insertelement <4 x i32> %res1, i32 %elt34, i32 2 | 179 %res2 = insertelement <4 x i32> %res1, i32 %elt34, i32 2 |
171 %res3 = insertelement <4 x i32> %res2, i32 %elt1, i32 3 | 180 %res3 = insertelement <4 x i32> %res2, i32 %elt1, i32 3 |
172 ret <4 x i32> %res3 | 181 ret <4 x i32> %res3 |
173 } | 182 } |
174 ; CHECK-LABEL: test_pinsrd | 183 ; CHECK-LABEL: test_pinsrd |
175 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 01 pinsrd xmm0,e{{.*}} | 184 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 01 pinsrd xmm0,e{{.*}} |
176 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 02 pinsrd xmm0,e{{.*}} | 185 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 02 pinsrd xmm0,e{{.*}} |
177 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 03 pinsrd xmm0,e{{.*}} | 186 ; CHECK-DAG: 66 0f 3a 22 c{{.*}} 03 pinsrd xmm0,e{{.*}} |
178 | 187 |
179 define <16 x i8> @test_pinsrb(<16 x i8> %vec, i32 %elt1_w, i32 %elt2_w, i32 %elt
3_w, i32 %elt4_w) { | 188 define internal <16 x i8> @test_pinsrb(<16 x i8> %vec, i32 %elt1_w, i32 %elt2_w, |
| 189 i32 %elt3_w, i32 %elt4_w) { |
180 entry: | 190 entry: |
181 %elt1 = trunc i32 %elt1_w to i8 | 191 %elt1 = trunc i32 %elt1_w to i8 |
182 %elt2 = trunc i32 %elt2_w to i8 | 192 %elt2 = trunc i32 %elt2_w to i8 |
183 %elt3 = trunc i32 %elt3_w to i8 | 193 %elt3 = trunc i32 %elt3_w to i8 |
184 %elt4 = trunc i32 %elt4_w to i8 | 194 %elt4 = trunc i32 %elt4_w to i8 |
185 %elt12 = add i8 %elt1, %elt2 | 195 %elt12 = add i8 %elt1, %elt2 |
186 %elt34 = add i8 %elt3, %elt4 | 196 %elt34 = add i8 %elt3, %elt4 |
187 %res1 = insertelement <16 x i8> %vec, i8 %elt12, i32 1 | 197 %res1 = insertelement <16 x i8> %vec, i8 %elt12, i32 1 |
188 %res2 = insertelement <16 x i8> %res1, i8 %elt34, i32 7 | 198 %res2 = insertelement <16 x i8> %res1, i8 %elt34, i32 7 |
189 %res3 = insertelement <16 x i8> %res2, i8 %elt1, i32 15 | 199 %res3 = insertelement <16 x i8> %res2, i8 %elt1, i32 15 |
190 ret <16 x i8> %res3 | 200 ret <16 x i8> %res3 |
191 } | 201 } |
192 ; CHECK-LABEL: test_pinsrb | 202 ; CHECK-LABEL: test_pinsrb |
193 ; CHECK-DAG: 66 0f 3a 20 c{{.*}} 01 pinsrb xmm0,e{{.*}} | 203 ; CHECK-DAG: 66 0f 3a 20 c{{.*}} 01 pinsrb xmm0,e{{.*}} |
194 ; CHECK-DAG: 66 0f 3a 20 c{{.*}} 07 pinsrb xmm0,e{{.*}} | 204 ; CHECK-DAG: 66 0f 3a 20 c{{.*}} 07 pinsrb xmm0,e{{.*}} |
195 ; CHECK-DAG: 66 0f 3a 20 c{{.*}} 0f pinsrb xmm0,e{{.*}} | 205 ; CHECK-DAG: 66 0f 3a 20 c{{.*}} 0f pinsrb xmm0,e{{.*}} |
196 | 206 |
197 define <8 x i16> @test_pinsrw(<8 x i16> %vec, i32 %elt1_w, i32 %elt2_w, i32 %elt
3_w, i32 %elt4_w) { | 207 define internal <8 x i16> @test_pinsrw(<8 x i16> %vec, i32 %elt1_w, i32 %elt2_w, |
| 208 i32 %elt3_w, i32 %elt4_w) { |
198 entry: | 209 entry: |
199 %elt1 = trunc i32 %elt1_w to i16 | 210 %elt1 = trunc i32 %elt1_w to i16 |
200 %elt2 = trunc i32 %elt2_w to i16 | 211 %elt2 = trunc i32 %elt2_w to i16 |
201 %elt3 = trunc i32 %elt3_w to i16 | 212 %elt3 = trunc i32 %elt3_w to i16 |
202 %elt4 = trunc i32 %elt4_w to i16 | 213 %elt4 = trunc i32 %elt4_w to i16 |
203 %elt12 = add i16 %elt1, %elt2 | 214 %elt12 = add i16 %elt1, %elt2 |
204 %elt34 = add i16 %elt3, %elt4 | 215 %elt34 = add i16 %elt3, %elt4 |
205 %res1 = insertelement <8 x i16> %vec, i16 %elt12, i32 1 | 216 %res1 = insertelement <8 x i16> %vec, i16 %elt12, i32 1 |
206 %res2 = insertelement <8 x i16> %res1, i16 %elt34, i32 4 | 217 %res2 = insertelement <8 x i16> %res1, i16 %elt34, i32 4 |
207 %res3 = insertelement <8 x i16> %res2, i16 %elt1, i32 7 | 218 %res3 = insertelement <8 x i16> %res2, i16 %elt1, i32 7 |
208 ret <8 x i16> %res3 | 219 ret <8 x i16> %res3 |
209 } | 220 } |
210 ; CHECK-LABEL: test_pinsrw | 221 ; CHECK-LABEL: test_pinsrw |
211 ; CHECK-DAG: 66 0f c4 c{{.*}} 01 pinsrw xmm0,e{{.*}} | 222 ; CHECK-DAG: 66 0f c4 c{{.*}} 01 pinsrw xmm0,e{{.*}} |
212 ; CHECK-DAG: 66 0f c4 c{{.*}} 04 pinsrw xmm0,e{{.*}} | 223 ; CHECK-DAG: 66 0f c4 c{{.*}} 04 pinsrw xmm0,e{{.*}} |
213 ; CHECK-DAG: 66 0f c4 c{{.*}} 07 pinsrw xmm0,e{{.*}} | 224 ; CHECK-DAG: 66 0f c4 c{{.*}} 07 pinsrw xmm0,e{{.*}} |
214 | 225 |
215 define i32 @test_pextrd(i32 %c, <4 x i32> %vec1, <4 x i32> %vec2, <4 x i32> %vec
3, <4 x i32> %vec4) { | 226 define internal i32 @test_pextrd(i32 %c, <4 x i32> %vec1, <4 x i32> %vec2, |
| 227 <4 x i32> %vec3, <4 x i32> %vec4) { |
216 entry: | 228 entry: |
217 switch i32 %c, label %three [i32 0, label %zero | 229 switch i32 %c, label %three [i32 0, label %zero |
218 i32 1, label %one | 230 i32 1, label %one |
219 i32 2, label %two] | 231 i32 2, label %two] |
220 zero: | 232 zero: |
221 %res0 = extractelement <4 x i32> %vec1, i32 0 | 233 %res0 = extractelement <4 x i32> %vec1, i32 0 |
222 ret i32 %res0 | 234 ret i32 %res0 |
223 one: | 235 one: |
224 %res1 = extractelement <4 x i32> %vec2, i32 1 | 236 %res1 = extractelement <4 x i32> %vec2, i32 1 |
225 ret i32 %res1 | 237 ret i32 %res1 |
226 two: | 238 two: |
227 %res2 = extractelement <4 x i32> %vec3, i32 2 | 239 %res2 = extractelement <4 x i32> %vec3, i32 2 |
228 ret i32 %res2 | 240 ret i32 %res2 |
229 three: | 241 three: |
230 %res3 = extractelement <4 x i32> %vec4, i32 3 | 242 %res3 = extractelement <4 x i32> %vec4, i32 3 |
231 ret i32 %res3 | 243 ret i32 %res3 |
232 } | 244 } |
233 ; CHECK-LABEL: test_pextrd | 245 ; CHECK-LABEL: test_pextrd |
234 ; CHECK-DAG: 66 0f 3a 16 c0 00 pextrd eax,xmm0 | 246 ; CHECK-DAG: 66 0f 3a 16 c0 00 pextrd eax,xmm0 |
235 ; CHECK-DAG: 66 0f 3a 16 c8 01 pextrd eax,xmm1 | 247 ; CHECK-DAG: 66 0f 3a 16 c8 01 pextrd eax,xmm1 |
236 ; CHECK-DAG: 66 0f 3a 16 d0 02 pextrd eax,xmm2 | 248 ; CHECK-DAG: 66 0f 3a 16 d0 02 pextrd eax,xmm2 |
237 ; CHECK-DAG: 66 0f 3a 16 d8 03 pextrd eax,xmm3 | 249 ; CHECK-DAG: 66 0f 3a 16 d8 03 pextrd eax,xmm3 |
238 | 250 |
239 define i32 @test_pextrb(i32 %c, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec
3, <16 x i8> %vec4) { | 251 define internal i32 @test_pextrb(i32 %c, <16 x i8> %vec1, <16 x i8> %vec2, |
| 252 <16 x i8> %vec3, <16 x i8> %vec4) { |
240 entry: | 253 entry: |
241 switch i32 %c, label %three [i32 0, label %zero | 254 switch i32 %c, label %three [i32 0, label %zero |
242 i32 1, label %one | 255 i32 1, label %one |
243 i32 2, label %two] | 256 i32 2, label %two] |
244 zero: | 257 zero: |
245 %res0 = extractelement <16 x i8> %vec1, i32 0 | 258 %res0 = extractelement <16 x i8> %vec1, i32 0 |
246 %res0_ext = zext i8 %res0 to i32 | 259 %res0_ext = zext i8 %res0 to i32 |
247 ret i32 %res0_ext | 260 ret i32 %res0_ext |
248 one: | 261 one: |
249 %res1 = extractelement <16 x i8> %vec2, i32 6 | 262 %res1 = extractelement <16 x i8> %vec2, i32 6 |
250 %res1_ext = zext i8 %res1 to i32 | 263 %res1_ext = zext i8 %res1 to i32 |
251 ret i32 %res1_ext | 264 ret i32 %res1_ext |
252 two: | 265 two: |
253 %res2 = extractelement <16 x i8> %vec3, i32 12 | 266 %res2 = extractelement <16 x i8> %vec3, i32 12 |
254 %res2_ext = zext i8 %res2 to i32 | 267 %res2_ext = zext i8 %res2 to i32 |
255 ret i32 %res2_ext | 268 ret i32 %res2_ext |
256 three: | 269 three: |
257 %res3 = extractelement <16 x i8> %vec4, i32 15 | 270 %res3 = extractelement <16 x i8> %vec4, i32 15 |
258 %res3_ext = zext i8 %res3 to i32 | 271 %res3_ext = zext i8 %res3 to i32 |
259 ret i32 %res3_ext | 272 ret i32 %res3_ext |
260 } | 273 } |
261 ; CHECK-LABEL: test_pextrb | 274 ; CHECK-LABEL: test_pextrb |
262 ; CHECK-DAG: 66 0f 3a 14 c0 00 pextrb eax,xmm0 | 275 ; CHECK-DAG: 66 0f 3a 14 c0 00 pextrb eax,xmm0 |
263 ; CHECK-DAG: 66 0f 3a 14 c8 06 pextrb eax,xmm1 | 276 ; CHECK-DAG: 66 0f 3a 14 c8 06 pextrb eax,xmm1 |
264 ; CHECK-DAG: 66 0f 3a 14 d0 0c pextrb eax,xmm2 | 277 ; CHECK-DAG: 66 0f 3a 14 d0 0c pextrb eax,xmm2 |
265 ; CHECK-DAG: 66 0f 3a 14 d8 0f pextrb eax,xmm3 | 278 ; CHECK-DAG: 66 0f 3a 14 d8 0f pextrb eax,xmm3 |
266 | 279 |
267 define i32 @test_pextrw(i32 %c, <8 x i16> %vec1, <8 x i16> %vec2, <8 x i16> %vec
3, <8 x i16> %vec4) { | 280 define internal i32 @test_pextrw(i32 %c, <8 x i16> %vec1, <8 x i16> %vec2, |
| 281 <8 x i16> %vec3, <8 x i16> %vec4) { |
268 entry: | 282 entry: |
269 switch i32 %c, label %three [i32 0, label %zero | 283 switch i32 %c, label %three [i32 0, label %zero |
270 i32 1, label %one | 284 i32 1, label %one |
271 i32 2, label %two] | 285 i32 2, label %two] |
272 zero: | 286 zero: |
273 %res0 = extractelement <8 x i16> %vec1, i32 0 | 287 %res0 = extractelement <8 x i16> %vec1, i32 0 |
274 %res0_ext = zext i16 %res0 to i32 | 288 %res0_ext = zext i16 %res0 to i32 |
275 ret i32 %res0_ext | 289 ret i32 %res0_ext |
276 one: | 290 one: |
277 %res1 = extractelement <8 x i16> %vec2, i32 2 | 291 %res1 = extractelement <8 x i16> %vec2, i32 2 |
278 %res1_ext = zext i16 %res1 to i32 | 292 %res1_ext = zext i16 %res1 to i32 |
279 ret i32 %res1_ext | 293 ret i32 %res1_ext |
280 two: | 294 two: |
281 %res2 = extractelement <8 x i16> %vec3, i32 5 | 295 %res2 = extractelement <8 x i16> %vec3, i32 5 |
282 %res2_ext = zext i16 %res2 to i32 | 296 %res2_ext = zext i16 %res2 to i32 |
283 ret i32 %res2_ext | 297 ret i32 %res2_ext |
284 three: | 298 three: |
285 %res3 = extractelement <8 x i16> %vec4, i32 7 | 299 %res3 = extractelement <8 x i16> %vec4, i32 7 |
286 %res3_ext = zext i16 %res3 to i32 | 300 %res3_ext = zext i16 %res3 to i32 |
287 ret i32 %res3_ext | 301 ret i32 %res3_ext |
288 } | 302 } |
289 ; CHECK-LABEL: test_pextrw | 303 ; CHECK-LABEL: test_pextrw |
290 ; CHECK-DAG: 66 0f c5 c0 00 pextrw eax,xmm0 | 304 ; CHECK-DAG: 66 0f c5 c0 00 pextrw eax,xmm0 |
291 ; CHECK-DAG: 66 0f c5 c1 02 pextrw eax,xmm1 | 305 ; CHECK-DAG: 66 0f c5 c1 02 pextrw eax,xmm1 |
292 ; CHECK-DAG: 66 0f c5 c2 05 pextrw eax,xmm2 | 306 ; CHECK-DAG: 66 0f c5 c2 05 pextrw eax,xmm2 |
293 ; CHECK-DAG: 66 0f c5 c3 07 pextrw eax,xmm3 | 307 ; CHECK-DAG: 66 0f c5 c3 07 pextrw eax,xmm3 |
OLD | NEW |