Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 ; This checks support for insertelement and extractelement. | 1 ; This checks support for insertelement and extractelement. |
| 2 | 2 |
| 3 ; RUN: %llvm2ice --verbose inst %s | FileCheck %s | 3 ; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s |
| 4 ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s | |
| 5 ; RUN: %llvm2ice -O2 -mattr=sse4.1 --verbose none %s \ | |
| 6 ; RUN: | FileCheck %s --check-prefix=SSE41 | |
| 7 ; RUN: %llvm2ice -Om1 -mattr=sse4.1 --verbose none %s \ | |
| 8 ; RUN: | FileCheck %s --check-prefix=SSE41 | |
| 4 ; RUN: %llvm2ice -O2 --verbose none %s \ | 9 ; RUN: %llvm2ice -O2 --verbose none %s \ |
| 5 ; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj | 10 ; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj |
| 6 ; RUN: %llvm2ice -Om1 --verbose none %s \ | 11 ; RUN: %llvm2ice -Om1 --verbose none %s \ |
| 7 ; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj | 12 ; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj |
| 13 ; RUN: %llvm2ice -O2 -mattr=sse4.1 --verbose none %s \ | |
| 14 ; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj | |
| 15 ; RUN: %llvm2ice -Om1 -mattr=sse4.1 --verbose none %s \ | |
| 16 ; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj | |
| 8 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s | 17 ; RUN: %llvm2ice --verbose none %s | FileCheck --check-prefix=ERRORS %s |
| 9 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s | 18 ; RUN: %llvm2iceinsts %s | %szdiff %s | FileCheck --check-prefix=DUMP %s |
| 10 ; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \ | 19 ; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \ |
| 11 ; RUN: | FileCheck --check-prefix=DUMP %s | 20 ; RUN: | FileCheck --check-prefix=DUMP %s |
| 12 | 21 |
| 13 ; insertelement operations | 22 ; insertelement operations |
| 14 | 23 |
| 15 define <4 x float> @insertelement_v4f32_0(<4 x float> %vec, float %elt) { | 24 define <4 x float> @insertelement_v4f32_0(<4 x float> %vec, float %elt) { |
| 16 entry: | 25 entry: |
| 17 %res = insertelement <4 x float> %vec, float %elt, i32 0 | 26 %res = insertelement <4 x float> %vec, float %elt, i32 0 |
| 18 ret <4 x float> %res | 27 ret <4 x float> %res |
| 19 ; CHECK-LABEL: insertelement_v4f32_0: | 28 ; CHECK-LABEL: insertelement_v4f32_0: |
| 20 ; CHECK: movss | 29 ; CHECK: movss |
| 30 | |
| 31 ; SSE41-LABEL: insertelement_v4f32_0: | |
| 32 ; SSE41: insertps {{.*}}, {{.*}}, 0 | |
| 21 } | 33 } |
| 22 | 34 |
| 23 define <4 x i32> @insertelement_v4i32_0(<4 x i32> %vec, i32 %elt) { | 35 define <4 x i32> @insertelement_v4i32_0(<4 x i32> %vec, i32 %elt) { |
| 24 entry: | 36 entry: |
| 25 %res = insertelement <4 x i32> %vec, i32 %elt, i32 0 | 37 %res = insertelement <4 x i32> %vec, i32 %elt, i32 0 |
| 26 ret <4 x i32> %res | 38 ret <4 x i32> %res |
| 27 ; CHECK-LABEL: insertelement_v4i32_0: | 39 ; CHECK-LABEL: insertelement_v4i32_0: |
| 28 ; CHECK: movss | 40 ; CHECK: movss |
| 41 | |
| 42 ; SSE41-LABEL: insertelement_v4i32_0: | |
| 43 ; SSE41: pinsrd {{.*}}, {{.*}}, 0 | |
| 29 } | 44 } |
| 30 | 45 |
| 31 | 46 |
| 32 define <4 x float> @insertelement_v4f32_1(<4 x float> %vec, float %elt) { | 47 define <4 x float> @insertelement_v4f32_1(<4 x float> %vec, float %elt) { |
| 33 entry: | 48 entry: |
| 34 %res = insertelement <4 x float> %vec, float %elt, i32 1 | 49 %res = insertelement <4 x float> %vec, float %elt, i32 1 |
| 35 ret <4 x float> %res | 50 ret <4 x float> %res |
| 36 ; CHECK-LABEL: insertelement_v4f32_1: | 51 ; CHECK-LABEL: insertelement_v4f32_1: |
| 37 ; CHECK: shufps | 52 ; CHECK: shufps |
| 38 ; CHECK: shufps | 53 ; CHECK: shufps |
| 54 | |
| 55 ; SSE41-LABEL: insertelement_v4f32_1: | |
| 56 ; SSE41: insertps {{.*}}, {{.*}}, 16 | |
| 39 } | 57 } |
| 40 | 58 |
| 41 define <4 x i32> @insertelement_v4i32_1(<4 x i32> %vec, i32 %elt) { | 59 define <4 x i32> @insertelement_v4i32_1(<4 x i32> %vec, i32 %elt) { |
| 42 entry: | 60 entry: |
| 43 %res = insertelement <4 x i32> %vec, i32 %elt, i32 1 | 61 %res = insertelement <4 x i32> %vec, i32 %elt, i32 1 |
| 44 ret <4 x i32> %res | 62 ret <4 x i32> %res |
| 45 ; CHECK-LABEL: insertelement_v4i32_1: | 63 ; CHECK-LABEL: insertelement_v4i32_1: |
| 46 ; CHECK: shufps | 64 ; CHECK: shufps |
| 47 ; CHECK: shufps | 65 ; CHECK: shufps |
| 66 | |
| 67 ; SSE41-LABEL: insertelement_v4i32_1: | |
| 68 ; SSE41: pinsrd {{.*}}, {{.*}}, 1 | |
| 48 } | 69 } |
| 49 | 70 |
| 50 define <8 x i16> @insertelement_v8i16(<8 x i16> %vec, i32 %elt.arg) { | 71 define <8 x i16> @insertelement_v8i16(<8 x i16> %vec, i32 %elt.arg) { |
| 51 entry: | 72 entry: |
| 52 %elt = trunc i32 %elt.arg to i16 | 73 %elt = trunc i32 %elt.arg to i16 |
| 53 %res = insertelement <8 x i16> %vec, i16 %elt, i32 1 | 74 %res = insertelement <8 x i16> %vec, i16 %elt, i32 1 |
| 54 ret <8 x i16> %res | 75 ret <8 x i16> %res |
| 55 ; CHECK-LABEL: insertelement_v8i16 | 76 ; CHECK-LABEL: insertelement_v8i16: |
| 56 ; CHECK: pinsrw | 77 ; CHECK: pinsrw |
| 78 | |
| 79 ; SSE41-LABEL: insertelement_v8i16: | |
| 80 ; SSE41: pinsrw | |
| 57 } | 81 } |
| 58 | 82 |
| 59 define <16 x i8> @insertelement_v16i8(<16 x i8> %vec, i32 %elt.arg) { | 83 define <16 x i8> @insertelement_v16i8(<16 x i8> %vec, i32 %elt.arg) { |
| 60 entry: | 84 entry: |
| 61 %elt = trunc i32 %elt.arg to i8 | 85 %elt = trunc i32 %elt.arg to i8 |
| 62 %res = insertelement <16 x i8> %vec, i8 %elt, i32 1 | 86 %res = insertelement <16 x i8> %vec, i8 %elt, i32 1 |
| 63 ret <16 x i8> %res | 87 ret <16 x i8> %res |
| 64 ; CHECK-LABEL: insertelement_v16i8: | 88 ; CHECK-LABEL: insertelement_v16i8: |
| 65 ; CHECK: movups | 89 ; CHECK: movups |
| 66 ; CHECK: lea | 90 ; CHECK: lea |
| 67 ; CHECK: mov | 91 ; CHECK: mov |
| 92 | |
| 93 ; SSE41-LABEL: insertelement_v16i8: | |
| 94 ; SSE41: pinsrb | |
| 68 } | 95 } |
| 69 | 96 |
| 70 define <4 x i1> @insertelement_v4i1_0(<4 x i1> %vec, i32 %elt.arg) { | 97 define <4 x i1> @insertelement_v4i1_0(<4 x i1> %vec, i32 %elt.arg) { |
| 71 entry: | 98 entry: |
| 72 %elt = trunc i32 %elt.arg to i1 | 99 %elt = trunc i32 %elt.arg to i1 |
| 73 %res = insertelement <4 x i1> %vec, i1 %elt, i32 0 | 100 %res = insertelement <4 x i1> %vec, i1 %elt, i32 0 |
| 74 ret <4 x i1> %res | 101 ret <4 x i1> %res |
| 75 ; CHECK-LABEL: insertelement_v4i1_0: | 102 ; CHECK-LABEL: insertelement_v4i1_0: |
| 76 ; CHECK: movss | 103 ; CHECK: movss |
| 104 | |
| 105 ; SSE41-LABEL: insertelement_v4i1_0: | |
| 106 ; SSE41: pinsrd {{.*}}, {{.*}}, 0 | |
| 77 } | 107 } |
| 78 | 108 |
| 79 define <4 x i1> @insertelement_v4i1_1(<4 x i1> %vec, i32 %elt.arg) { | 109 define <4 x i1> @insertelement_v4i1_1(<4 x i1> %vec, i32 %elt.arg) { |
| 80 entry: | 110 entry: |
| 81 %elt = trunc i32 %elt.arg to i1 | 111 %elt = trunc i32 %elt.arg to i1 |
| 82 %res = insertelement <4 x i1> %vec, i1 %elt, i32 1 | 112 %res = insertelement <4 x i1> %vec, i1 %elt, i32 1 |
| 83 ret <4 x i1> %res | 113 ret <4 x i1> %res |
| 84 ; CHECK-LABEL: insertelement_v4i1_1: | 114 ; CHECK-LABEL: insertelement_v4i1_1: |
| 85 ; CHECK: shufps | 115 ; CHECK: shufps |
| 86 ; CHECK: shufps | 116 ; CHECK: shufps |
| 117 | |
| 118 ; SSE41-LABEL: insertelement_v4i1_1: | |
| 119 ; SSE41: pinsrd {{.*}}, {{.*}}, 1 | |
| 87 } | 120 } |
| 88 | 121 |
| 89 define <8 x i1> @insertelement_v8i1(<8 x i1> %vec, i32 %elt.arg) { | 122 define <8 x i1> @insertelement_v8i1(<8 x i1> %vec, i32 %elt.arg) { |
| 90 entry: | 123 entry: |
| 91 %elt = trunc i32 %elt.arg to i1 | 124 %elt = trunc i32 %elt.arg to i1 |
| 92 %res = insertelement <8 x i1> %vec, i1 %elt, i32 1 | 125 %res = insertelement <8 x i1> %vec, i1 %elt, i32 1 |
| 93 ret <8 x i1> %res | 126 ret <8 x i1> %res |
| 94 ; CHECK-LABEL: insertelement_v8i1: | 127 ; CHECK-LABEL: insertelement_v8i1: |
| 95 ; CHECK: pinsrw | 128 ; CHECK: pinsrw |
| 129 | |
| 130 ; SSE41-LABEL: insertelement_v8i1: | |
| 131 ; SSE41: pinsrw | |
| 96 } | 132 } |
| 97 | 133 |
| 98 define <16 x i1> @insertelement_v16i1(<16 x i1> %vec, i32 %elt.arg) { | 134 define <16 x i1> @insertelement_v16i1(<16 x i1> %vec, i32 %elt.arg) { |
| 99 entry: | 135 entry: |
| 100 %elt = trunc i32 %elt.arg to i1 | 136 %elt = trunc i32 %elt.arg to i1 |
| 101 %res = insertelement <16 x i1> %vec, i1 %elt, i32 1 | 137 %res = insertelement <16 x i1> %vec, i1 %elt, i32 1 |
| 102 ret <16 x i1> %res | 138 ret <16 x i1> %res |
| 103 ; CHECK-LABEL: insertelement_v16i1: | 139 ; CHECK-LABEL: insertelement_v16i1: |
| 104 ; CHECK: movups | 140 ; CHECK: movups |
| 105 ; CHECK: lea | 141 ; CHECK: lea |
| 106 ; CHECK: mov | 142 ; CHECK: mov |
| 143 | |
| 144 ; SSE41-LABEL: insertelement_v16i1: | |
| 145 ; SSE41: pinsrb | |
| 107 } | 146 } |
| 108 | 147 |
| 109 ; extractelement operations | 148 ; extractelement operations |
| 110 | 149 |
| 111 define float @extractelement_v4f32(<4 x float> %vec) { | 150 define float @extractelement_v4f32(<4 x float> %vec) { |
| 112 entry: | 151 entry: |
| 113 %res = extractelement <4 x float> %vec, i32 1 | 152 %res = extractelement <4 x float> %vec, i32 1 |
| 114 ret float %res | 153 ret float %res |
| 115 ; CHECK-LABEL: extractelement_v4f32: | 154 ; CHECK-LABEL: extractelement_v4f32: |
| 116 ; CHECK: pshufd | 155 ; CHECK: pshufd |
| 156 | |
| 157 ; SSE41-LABEL: extractelement_v4f32: | |
| 158 ; SSE41: pshufd | |
|
jvoung (off chromium)
2014/07/30 04:30:11
Is the plan to use extractps later?
wala
2014/07/30 18:11:55
No. I now think we should not be using extractps t
| |
| 117 } | 159 } |
| 118 | 160 |
| 119 define i32 @extractelement_v4i32(<4 x i32> %vec) { | 161 define i32 @extractelement_v4i32(<4 x i32> %vec) { |
| 120 entry: | 162 entry: |
| 121 %res = extractelement <4 x i32> %vec, i32 1 | 163 %res = extractelement <4 x i32> %vec, i32 1 |
| 122 ret i32 %res | 164 ret i32 %res |
| 123 ; CHECK-LABEL: extractelement_v4i32: | 165 ; CHECK-LABEL: extractelement_v4i32: |
| 124 ; CHECK: pshufd | 166 ; CHECK: pshufd |
| 167 | |
| 168 ; SSE41-LABEL: extractelement_v4i32: | |
| 169 ; SSE41: pextrd | |
| 125 } | 170 } |
| 126 | 171 |
| 127 define i32 @extractelement_v8i16(<8 x i16> %vec) { | 172 define i32 @extractelement_v8i16(<8 x i16> %vec) { |
| 128 entry: | 173 entry: |
| 129 %res = extractelement <8 x i16> %vec, i32 1 | 174 %res = extractelement <8 x i16> %vec, i32 1 |
| 130 %res.ext = zext i16 %res to i32 | 175 %res.ext = zext i16 %res to i32 |
| 131 ret i32 %res.ext | 176 ret i32 %res.ext |
| 132 ; CHECK-LABEL: extractelement_v8i16: | 177 ; CHECK-LABEL: extractelement_v8i16: |
| 133 ; CHECK: pextrw | 178 ; CHECK: pextrw |
| 179 | |
| 180 ; SSE41-LABEL: extractelement_v8i16: | |
| 181 ; SSE41: pextrw | |
| 134 } | 182 } |
| 135 | 183 |
| 136 define i32 @extractelement_v16i8(<16 x i8> %vec) { | 184 define i32 @extractelement_v16i8(<16 x i8> %vec) { |
| 137 entry: | 185 entry: |
| 138 %res = extractelement <16 x i8> %vec, i32 1 | 186 %res = extractelement <16 x i8> %vec, i32 1 |
| 139 %res.ext = zext i8 %res to i32 | 187 %res.ext = zext i8 %res to i32 |
| 140 ret i32 %res.ext | 188 ret i32 %res.ext |
| 141 ; CHECK-LABEL: extractelement_v16i8: | 189 ; CHECK-LABEL: extractelement_v16i8: |
| 142 ; CHECK: movups | 190 ; CHECK: movups |
| 143 ; CHECK: lea | 191 ; CHECK: lea |
| 144 ; CHECK: mov | 192 ; CHECK: mov |
| 193 | |
| 194 ; SSE41-LABEL: extractelement_v16i8: | |
| 195 ; SSE41: pextrb | |
| 145 } | 196 } |
| 146 | 197 |
| 147 define i32 @extractelement_v4i1(<4 x i1> %vec) { | 198 define i32 @extractelement_v4i1(<4 x i1> %vec) { |
| 148 entry: | 199 entry: |
| 149 %res = extractelement <4 x i1> %vec, i32 1 | 200 %res = extractelement <4 x i1> %vec, i32 1 |
| 150 %res.ext = zext i1 %res to i32 | 201 %res.ext = zext i1 %res to i32 |
| 151 ret i32 %res.ext | 202 ret i32 %res.ext |
| 152 ; CHECK-LABEL: extractelement_v4i1: | 203 ; CHECK-LABEL: extractelement_v4i1: |
| 153 ; CHECK: pshufd | 204 ; CHECK: pshufd |
| 205 | |
| 206 ; SSE41-LABEL: extractelement_v4i1: | |
| 207 ; SSE41: pextrd | |
| 154 } | 208 } |
| 155 | 209 |
| 156 define i32 @extractelement_v8i1(<8 x i1> %vec) { | 210 define i32 @extractelement_v8i1(<8 x i1> %vec) { |
| 157 entry: | 211 entry: |
| 158 %res = extractelement <8 x i1> %vec, i32 1 | 212 %res = extractelement <8 x i1> %vec, i32 1 |
| 159 %res.ext = zext i1 %res to i32 | 213 %res.ext = zext i1 %res to i32 |
| 160 ret i32 %res.ext | 214 ret i32 %res.ext |
| 161 ; CHECK-LABEL: extractelement_v8i1: | 215 ; CHECK-LABEL: extractelement_v8i1: |
| 162 ; CHECK: pextrw | 216 ; CHECK: pextrw |
| 217 | |
| 218 ; SSE41-LABEL: extractelement_v8i1: | |
| 219 ; SSE41: pextrw | |
| 163 } | 220 } |
| 164 | 221 |
| 165 define i32 @extractelement_v16i1(<16 x i1> %vec) { | 222 define i32 @extractelement_v16i1(<16 x i1> %vec) { |
| 166 entry: | 223 entry: |
| 167 %res = extractelement <16 x i1> %vec, i32 1 | 224 %res = extractelement <16 x i1> %vec, i32 1 |
| 168 %res.ext = zext i1 %res to i32 | 225 %res.ext = zext i1 %res to i32 |
| 169 ret i32 %res.ext | 226 ret i32 %res.ext |
| 170 ; CHECK-LABEL: extractelement_v16i1: | 227 ; CHECK-LABEL: extractelement_v16i1: |
| 171 ; CHECK: movups | 228 ; CHECK: movups |
| 172 ; CHECK: lea | 229 ; CHECK: lea |
| 173 ; CHECK: mov | 230 ; CHECK: mov |
| 231 | |
| 232 ; SSE41-LABEL: extractelement_v16i1: | |
| 233 ; SSE41: pextrb | |
| 174 } | 234 } |
| 175 | 235 |
| 176 ; ERRORS-NOT: ICE translation error | 236 ; ERRORS-NOT: ICE translation error |
| 177 ; DUMP-NOT: SZ | 237 ; DUMP-NOT: SZ |
| OLD | NEW |