OLD | NEW |
1 ; This checks support for insertelement and extractelement. | 1 ; This checks support for insertelement and extractelement. |
2 | 2 |
3 ; RUN: %llvm2ice --verbose inst %s | FileCheck %s | 3 ; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s |
| 4 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s |
| 5 ; RUN: %llvm2ice -O2 -mattr=sse4.1 --verbose none %s \ |
| 6 ; RUN: | FileCheck %s --check-prefix=SSE41 |
| 7 ; RUN: %llvm2ice -Om1 -mattr=sse4.1 --verbose none %s \ |
| 8 ; RUN: | FileCheck %s --check-prefix=SSE41 |
4 ; RUN: %llvm2ice -O2 --verbose none %s \ | 9 ; RUN: %llvm2ice -O2 --verbose none %s \ |
5 ; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj | 10 ; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj |
6 ; RUN: %llvm2ice -Om1 --verbose none %s \ | 11 ; RUN: %llvm2ice -Om1 --verbose none %s \ |
7 ; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj | 12 ; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj |
| 13 ; RUN: %llvm2ice -O2 -mattr=sse4.1 --verbose none %s \ |
| 14 ; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj |
| 15 ; RUN: %llvm2ice -Om1 -mattr=sse4.1 --verbose none %s \ |
| 16 ; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj |
8 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s | 17 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s |
9 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s | 18 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s |
10 ; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \ | 19 ; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \ |
11 ; RUN: | FileCheck --check-prefix=DUMP %s | 20 ; RUN: | FileCheck --check-prefix=DUMP %s |
12 | 21 |
13 ; insertelement operations | 22 ; insertelement operations |
14 | 23 |
15 define <4 x float> @insertelement_v4f32_0(<4 x float> %vec, float %elt) { | 24 define <4 x float> @insertelement_v4f32_0(<4 x float> %vec, float %elt) { |
16 entry: | 25 entry: |
17 %res = insertelement <4 x float> %vec, float %elt, i32 0 | 26 %res = insertelement <4 x float> %vec, float %elt, i32 0 |
18 ret <4 x float> %res | 27 ret <4 x float> %res |
19 ; CHECK-LABEL: insertelement_v4f32_0: | 28 ; CHECK-LABEL: insertelement_v4f32_0: |
20 ; CHECK: movss | 29 ; CHECK: movss |
| 30 |
| 31 ; SSE41-LABEL: insertelement_v4f32_0: |
| 32 ; SSE41: insertps {{.*}}, {{.*}}, 0 |
21 } | 33 } |
22 | 34 |
23 define <4 x i32> @insertelement_v4i32_0(<4 x i32> %vec, i32 %elt) { | 35 define <4 x i32> @insertelement_v4i32_0(<4 x i32> %vec, i32 %elt) { |
24 entry: | 36 entry: |
25 %res = insertelement <4 x i32> %vec, i32 %elt, i32 0 | 37 %res = insertelement <4 x i32> %vec, i32 %elt, i32 0 |
26 ret <4 x i32> %res | 38 ret <4 x i32> %res |
27 ; CHECK-LABEL: insertelement_v4i32_0: | 39 ; CHECK-LABEL: insertelement_v4i32_0: |
28 ; CHECK: movss | 40 ; CHECK: movss |
| 41 |
| 42 ; SSE41-LABEL: insertelement_v4i32_0: |
| 43 ; SSE41: pinsrd {{.*}}, {{.*}}, 0 |
29 } | 44 } |
30 | 45 |
31 | 46 |
32 define <4 x float> @insertelement_v4f32_1(<4 x float> %vec, float %elt) { | 47 define <4 x float> @insertelement_v4f32_1(<4 x float> %vec, float %elt) { |
33 entry: | 48 entry: |
34 %res = insertelement <4 x float> %vec, float %elt, i32 1 | 49 %res = insertelement <4 x float> %vec, float %elt, i32 1 |
35 ret <4 x float> %res | 50 ret <4 x float> %res |
36 ; CHECK-LABEL: insertelement_v4f32_1: | 51 ; CHECK-LABEL: insertelement_v4f32_1: |
37 ; CHECK: shufps | 52 ; CHECK: shufps |
38 ; CHECK: shufps | 53 ; CHECK: shufps |
| 54 |
| 55 ; SSE41-LABEL: insertelement_v4f32_1: |
| 56 ; SSE41: insertps {{.*}}, {{.*}}, 16 |
39 } | 57 } |
40 | 58 |
41 define <4 x i32> @insertelement_v4i32_1(<4 x i32> %vec, i32 %elt) { | 59 define <4 x i32> @insertelement_v4i32_1(<4 x i32> %vec, i32 %elt) { |
42 entry: | 60 entry: |
43 %res = insertelement <4 x i32> %vec, i32 %elt, i32 1 | 61 %res = insertelement <4 x i32> %vec, i32 %elt, i32 1 |
44 ret <4 x i32> %res | 62 ret <4 x i32> %res |
45 ; CHECK-LABEL: insertelement_v4i32_1: | 63 ; CHECK-LABEL: insertelement_v4i32_1: |
46 ; CHECK: shufps | 64 ; CHECK: shufps |
47 ; CHECK: shufps | 65 ; CHECK: shufps |
| 66 |
| 67 ; SSE41-LABEL: insertelement_v4i32_1: |
| 68 ; SSE41: pinsrd {{.*}}, {{.*}}, 1 |
48 } | 69 } |
49 | 70 |
50 define <8 x i16> @insertelement_v8i16(<8 x i16> %vec, i32 %elt.arg) { | 71 define <8 x i16> @insertelement_v8i16(<8 x i16> %vec, i32 %elt.arg) { |
51 entry: | 72 entry: |
52 %elt = trunc i32 %elt.arg to i16 | 73 %elt = trunc i32 %elt.arg to i16 |
53 %res = insertelement <8 x i16> %vec, i16 %elt, i32 1 | 74 %res = insertelement <8 x i16> %vec, i16 %elt, i32 1 |
54 ret <8 x i16> %res | 75 ret <8 x i16> %res |
55 ; CHECK-LABEL: insertelement_v8i16 | 76 ; CHECK-LABEL: insertelement_v8i16: |
56 ; CHECK: pinsrw | 77 ; CHECK: pinsrw |
| 78 |
| 79 ; SSE41-LABEL: insertelement_v8i16: |
| 80 ; SSE41: pinsrw |
57 } | 81 } |
58 | 82 |
59 define <16 x i8> @insertelement_v16i8(<16 x i8> %vec, i32 %elt.arg) { | 83 define <16 x i8> @insertelement_v16i8(<16 x i8> %vec, i32 %elt.arg) { |
60 entry: | 84 entry: |
61 %elt = trunc i32 %elt.arg to i8 | 85 %elt = trunc i32 %elt.arg to i8 |
62 %res = insertelement <16 x i8> %vec, i8 %elt, i32 1 | 86 %res = insertelement <16 x i8> %vec, i8 %elt, i32 1 |
63 ret <16 x i8> %res | 87 ret <16 x i8> %res |
64 ; CHECK-LABEL: insertelement_v16i8: | 88 ; CHECK-LABEL: insertelement_v16i8: |
65 ; CHECK: movups | 89 ; CHECK: movups |
66 ; CHECK: lea | 90 ; CHECK: lea |
67 ; CHECK: mov | 91 ; CHECK: mov |
| 92 |
| 93 ; SSE41-LABEL: insertelement_v16i8: |
| 94 ; SSE41: pinsrb |
68 } | 95 } |
69 | 96 |
70 define <4 x i1> @insertelement_v4i1_0(<4 x i1> %vec, i32 %elt.arg) { | 97 define <4 x i1> @insertelement_v4i1_0(<4 x i1> %vec, i32 %elt.arg) { |
71 entry: | 98 entry: |
72 %elt = trunc i32 %elt.arg to i1 | 99 %elt = trunc i32 %elt.arg to i1 |
73 %res = insertelement <4 x i1> %vec, i1 %elt, i32 0 | 100 %res = insertelement <4 x i1> %vec, i1 %elt, i32 0 |
74 ret <4 x i1> %res | 101 ret <4 x i1> %res |
75 ; CHECK-LABEL: insertelement_v4i1_0: | 102 ; CHECK-LABEL: insertelement_v4i1_0: |
76 ; CHECK: movss | 103 ; CHECK: movss |
| 104 |
| 105 ; SSE41-LABEL: insertelement_v4i1_0: |
| 106 ; SSE41: pinsrd {{.*}}, {{.*}}, 0 |
77 } | 107 } |
78 | 108 |
79 define <4 x i1> @insertelement_v4i1_1(<4 x i1> %vec, i32 %elt.arg) { | 109 define <4 x i1> @insertelement_v4i1_1(<4 x i1> %vec, i32 %elt.arg) { |
80 entry: | 110 entry: |
81 %elt = trunc i32 %elt.arg to i1 | 111 %elt = trunc i32 %elt.arg to i1 |
82 %res = insertelement <4 x i1> %vec, i1 %elt, i32 1 | 112 %res = insertelement <4 x i1> %vec, i1 %elt, i32 1 |
83 ret <4 x i1> %res | 113 ret <4 x i1> %res |
84 ; CHECK-LABEL: insertelement_v4i1_1: | 114 ; CHECK-LABEL: insertelement_v4i1_1: |
85 ; CHECK: shufps | 115 ; CHECK: shufps |
86 ; CHECK: shufps | 116 ; CHECK: shufps |
| 117 |
| 118 ; SSE41-LABEL: insertelement_v4i1_1: |
| 119 ; SSE41: pinsrd {{.*}}, {{.*}}, 1 |
87 } | 120 } |
88 | 121 |
89 define <8 x i1> @insertelement_v8i1(<8 x i1> %vec, i32 %elt.arg) { | 122 define <8 x i1> @insertelement_v8i1(<8 x i1> %vec, i32 %elt.arg) { |
90 entry: | 123 entry: |
91 %elt = trunc i32 %elt.arg to i1 | 124 %elt = trunc i32 %elt.arg to i1 |
92 %res = insertelement <8 x i1> %vec, i1 %elt, i32 1 | 125 %res = insertelement <8 x i1> %vec, i1 %elt, i32 1 |
93 ret <8 x i1> %res | 126 ret <8 x i1> %res |
94 ; CHECK-LABEL: insertelement_v8i1: | 127 ; CHECK-LABEL: insertelement_v8i1: |
95 ; CHECK: pinsrw | 128 ; CHECK: pinsrw |
| 129 |
| 130 ; SSE41-LABEL: insertelement_v8i1: |
| 131 ; SSE41: pinsrw |
96 } | 132 } |
97 | 133 |
98 define <16 x i1> @insertelement_v16i1(<16 x i1> %vec, i32 %elt.arg) { | 134 define <16 x i1> @insertelement_v16i1(<16 x i1> %vec, i32 %elt.arg) { |
99 entry: | 135 entry: |
100 %elt = trunc i32 %elt.arg to i1 | 136 %elt = trunc i32 %elt.arg to i1 |
101 %res = insertelement <16 x i1> %vec, i1 %elt, i32 1 | 137 %res = insertelement <16 x i1> %vec, i1 %elt, i32 1 |
102 ret <16 x i1> %res | 138 ret <16 x i1> %res |
103 ; CHECK-LABEL: insertelement_v16i1: | 139 ; CHECK-LABEL: insertelement_v16i1: |
104 ; CHECK: movups | 140 ; CHECK: movups |
105 ; CHECK: lea | 141 ; CHECK: lea |
106 ; CHECK: mov | 142 ; CHECK: mov |
| 143 |
| 144 ; SSE41-LABEL: insertelement_v16i1: |
| 145 ; SSE41: pinsrb |
107 } | 146 } |
108 | 147 |
109 ; extractelement operations | 148 ; extractelement operations |
110 | 149 |
111 define float @extractelement_v4f32(<4 x float> %vec) { | 150 define float @extractelement_v4f32(<4 x float> %vec) { |
112 entry: | 151 entry: |
113 %res = extractelement <4 x float> %vec, i32 1 | 152 %res = extractelement <4 x float> %vec, i32 1 |
114 ret float %res | 153 ret float %res |
115 ; CHECK-LABEL: extractelement_v4f32: | 154 ; CHECK-LABEL: extractelement_v4f32: |
116 ; CHECK: pshufd | 155 ; CHECK: pshufd |
| 156 |
| 157 ; SSE41-LABEL: extractelement_v4f32: |
| 158 ; SSE41: pshufd |
117 } | 159 } |
118 | 160 |
119 define i32 @extractelement_v4i32(<4 x i32> %vec) { | 161 define i32 @extractelement_v4i32(<4 x i32> %vec) { |
120 entry: | 162 entry: |
121 %res = extractelement <4 x i32> %vec, i32 1 | 163 %res = extractelement <4 x i32> %vec, i32 1 |
122 ret i32 %res | 164 ret i32 %res |
123 ; CHECK-LABEL: extractelement_v4i32: | 165 ; CHECK-LABEL: extractelement_v4i32: |
124 ; CHECK: pshufd | 166 ; CHECK: pshufd |
| 167 |
| 168 ; SSE41-LABEL: extractelement_v4i32: |
| 169 ; SSE41: pextrd |
125 } | 170 } |
126 | 171 |
127 define i32 @extractelement_v8i16(<8 x i16> %vec) { | 172 define i32 @extractelement_v8i16(<8 x i16> %vec) { |
128 entry: | 173 entry: |
129 %res = extractelement <8 x i16> %vec, i32 1 | 174 %res = extractelement <8 x i16> %vec, i32 1 |
130 %res.ext = zext i16 %res to i32 | 175 %res.ext = zext i16 %res to i32 |
131 ret i32 %res.ext | 176 ret i32 %res.ext |
132 ; CHECK-LABEL: extractelement_v8i16: | 177 ; CHECK-LABEL: extractelement_v8i16: |
133 ; CHECK: pextrw | 178 ; CHECK: pextrw |
| 179 |
| 180 ; SSE41-LABEL: extractelement_v8i16: |
| 181 ; SSE41: pextrw |
134 } | 182 } |
135 | 183 |
136 define i32 @extractelement_v16i8(<16 x i8> %vec) { | 184 define i32 @extractelement_v16i8(<16 x i8> %vec) { |
137 entry: | 185 entry: |
138 %res = extractelement <16 x i8> %vec, i32 1 | 186 %res = extractelement <16 x i8> %vec, i32 1 |
139 %res.ext = zext i8 %res to i32 | 187 %res.ext = zext i8 %res to i32 |
140 ret i32 %res.ext | 188 ret i32 %res.ext |
141 ; CHECK-LABEL: extractelement_v16i8: | 189 ; CHECK-LABEL: extractelement_v16i8: |
142 ; CHECK: movups | 190 ; CHECK: movups |
143 ; CHECK: lea | 191 ; CHECK: lea |
144 ; CHECK: mov | 192 ; CHECK: mov |
| 193 |
| 194 ; SSE41-LABEL: extractelement_v16i8: |
| 195 ; SSE41: pextrb |
145 } | 196 } |
146 | 197 |
147 define i32 @extractelement_v4i1(<4 x i1> %vec) { | 198 define i32 @extractelement_v4i1(<4 x i1> %vec) { |
148 entry: | 199 entry: |
149 %res = extractelement <4 x i1> %vec, i32 1 | 200 %res = extractelement <4 x i1> %vec, i32 1 |
150 %res.ext = zext i1 %res to i32 | 201 %res.ext = zext i1 %res to i32 |
151 ret i32 %res.ext | 202 ret i32 %res.ext |
152 ; CHECK-LABEL: extractelement_v4i1: | 203 ; CHECK-LABEL: extractelement_v4i1: |
153 ; CHECK: pshufd | 204 ; CHECK: pshufd |
| 205 |
| 206 ; SSE41-LABEL: extractelement_v4i1: |
| 207 ; SSE41: pextrd |
154 } | 208 } |
155 | 209 |
156 define i32 @extractelement_v8i1(<8 x i1> %vec) { | 210 define i32 @extractelement_v8i1(<8 x i1> %vec) { |
157 entry: | 211 entry: |
158 %res = extractelement <8 x i1> %vec, i32 1 | 212 %res = extractelement <8 x i1> %vec, i32 1 |
159 %res.ext = zext i1 %res to i32 | 213 %res.ext = zext i1 %res to i32 |
160 ret i32 %res.ext | 214 ret i32 %res.ext |
161 ; CHECK-LABEL: extractelement_v8i1: | 215 ; CHECK-LABEL: extractelement_v8i1: |
162 ; CHECK: pextrw | 216 ; CHECK: pextrw |
| 217 |
| 218 ; SSE41-LABEL: extractelement_v8i1: |
| 219 ; SSE41: pextrw |
163 } | 220 } |
164 | 221 |
165 define i32 @extractelement_v16i1(<16 x i1> %vec) { | 222 define i32 @extractelement_v16i1(<16 x i1> %vec) { |
166 entry: | 223 entry: |
167 %res = extractelement <16 x i1> %vec, i32 1 | 224 %res = extractelement <16 x i1> %vec, i32 1 |
168 %res.ext = zext i1 %res to i32 | 225 %res.ext = zext i1 %res to i32 |
169 ret i32 %res.ext | 226 ret i32 %res.ext |
170 ; CHECK-LABEL: extractelement_v16i1: | 227 ; CHECK-LABEL: extractelement_v16i1: |
171 ; CHECK: movups | 228 ; CHECK: movups |
172 ; CHECK: lea | 229 ; CHECK: lea |
173 ; CHECK: mov | 230 ; CHECK: mov |
| 231 |
| 232 ; SSE41-LABEL: extractelement_v16i1: |
| 233 ; SSE41: pextrb |
174 } | 234 } |
175 | 235 |
176 ; ERRORS-NOT: ICE translation error | 236 ; ERRORS-NOT: ICE translation error |
177 ; DUMP-NOT: SZ | 237 ; DUMP-NOT: SZ |
OLD | NEW |