OLD | NEW |
(Empty) | |
| 1 ; Some shufflevector optimized lowering. This list is by no means exhaustive. It |
| 2 ; is only a **basic** smoke test. the vector_ops crosstest has a broader range |
| 3 ; of test cases. |
| 4 |
| 5 ; RUN: %p2i -i %s --target=x8632 --filetype=obj --disassemble -a -O2 \ |
| 6 ; RUN: --allow-externally-defined-symbols | FileCheck %s --check-prefix=X86 |
| 7 |
| 8 declare void @useV4I32(<4 x i32> %t); |
| 9 |
| 10 define internal void @shuffleV4I32(<4 x i32> %a, <4 x i32> %b) { |
| 11 ; X86-LABEL: shuffleV4I32 |
| 12 %a_0 = extractelement <4 x i32> %a, i32 0 |
| 13 %a_1 = extractelement <4 x i32> %a, i32 1 |
| 14 %a_2 = extractelement <4 x i32> %a, i32 2 |
| 15 %a_3 = extractelement <4 x i32> %a, i32 3 |
| 16 |
| 17 %b_0 = extractelement <4 x i32> %b, i32 0 |
| 18 %b_1 = extractelement <4 x i32> %b, i32 1 |
| 19 %b_2 = extractelement <4 x i32> %b, i32 2 |
| 20 %b_3 = extractelement <4 x i32> %b, i32 3 |
| 21 |
| 22 %t0_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0 |
| 23 %t0_1 = insertelement <4 x i32> %t0_0, i32 %b_0, i32 1 |
| 24 %t0_2 = insertelement <4 x i32> %t0_1, i32 %a_1, i32 2 |
| 25 %t0 = insertelement <4 x i32> %t0_2, i32 %b_1, i32 3 |
| 26 ; X86: punpckldq {{.*}} |
| 27 |
| 28 call void @useV4I32(<4 x i32> %t0) |
| 29 ; X86: call |
| 30 |
| 31 %t1_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0 |
| 32 %t1_1 = insertelement <4 x i32> %t1_0, i32 %b_1, i32 1 |
| 33 %t1_2 = insertelement <4 x i32> %t1_1, i32 %b_1, i32 2 |
| 34 %t1 = insertelement <4 x i32> %t1_2, i32 %a_0, i32 3 |
| 35 ; X86: shufps [[T:xmm[0-9]+]],{{.*}},0x10 |
| 36 ; X86: pshufd {{.*}},[[T]],0x28 |
| 37 |
| 38 call void @useV4I32(<4 x i32> %t1) |
| 39 ; X86: call |
| 40 |
| 41 %t2_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0 |
| 42 %t2_1 = insertelement <4 x i32> %t2_0, i32 %b_3, i32 1 |
| 43 %t2_2 = insertelement <4 x i32> %t2_1, i32 %a_2, i32 2 |
| 44 %t2 = insertelement <4 x i32> %t2_2, i32 %b_2, i32 3 |
| 45 ; X86: shufps {{.*}},0x30 |
| 46 ; X86: shufps {{.*}},0x22 |
| 47 ; X86: shufps {{.*}},0x88 |
| 48 |
| 49 call void @useV4I32(<4 x i32> %t2) |
| 50 ; X86: call |
| 51 |
| 52 ret void |
| 53 } |
OLD | NEW |