OLD | NEW |
---|---|
(Empty) | |
1 ; Some shufflevector optimized lowering. This list is by no means exhaustive. | |
Jim Stichnoth
2016/04/25 21:23:45
reflow to 80-col
John
2016/04/25 22:38:39
Done.
| |
2 ; It is only a **basic** smoke test. the vector_ops crosstest has a broader | |
3 ; range of test cases. | |
4 | |
5 ; RUN: %p2i -i %s --target=x8632 --filetype=obj --disassemble -a -O2 \ | |
6 ; RUN: --allow-externally-defined-symbols | FileCheck %s --check-prefix=X86 | |
7 | |
8 declare void @useV4I32(<4 x i32> %t); | |
9 | |
10 define internal void @shuffleV4I32(<4 x i32> %a, <4 x i32> %b) { | |
11 ; X86-LABEL: shuffleV4I32 | |
12 %a_0 = extractelement <4 x i32> %a, i32 0 | |
13 %a_1 = extractelement <4 x i32> %a, i32 1 | |
14 %a_2 = extractelement <4 x i32> %a, i32 2 | |
15 %a_3 = extractelement <4 x i32> %a, i32 3 | |
16 | |
17 %b_0 = extractelement <4 x i32> %b, i32 0 | |
18 %b_1 = extractelement <4 x i32> %b, i32 1 | |
19 %b_2 = extractelement <4 x i32> %b, i32 2 | |
20 %b_3 = extractelement <4 x i32> %b, i32 3 | |
21 | |
22 %t0_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0 | |
23 %t0_1 = insertelement <4 x i32> %t0_0, i32 %b_0, i32 1 | |
24 %t0_2 = insertelement <4 x i32> %t0_1, i32 %a_1, i32 2 | |
25 %t0 = insertelement <4 x i32> %t0_2, i32 %b_1, i32 3 | |
26 ; X86: punpckldq {{.*}} | |
27 | |
28 call void @useV4I32(<4 x i32> %t0) | |
29 ; X86: call | |
30 | |
31 %t1_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0 | |
32 %t1_1 = insertelement <4 x i32> %t1_0, i32 %b_1, i32 1 | |
33 %t1_2 = insertelement <4 x i32> %t1_1, i32 %b_1, i32 2 | |
34 %t1 = insertelement <4 x i32> %t1_2, i32 %a_0, i32 3 | |
35 ; X86: shufps [[T:xmm[0-9]+]],{{.*}},0x10 | |
36 ; X86: pshufd {{.*}},[[T]],0x28 | |
37 | |
38 call void @useV4I32(<4 x i32> %t1) | |
39 ; X86: call | |
40 | |
41 %t2_0 = insertelement <4 x i32> undef, i32 %a_0, i32 0 | |
42 %t2_1 = insertelement <4 x i32> %t2_0, i32 %b_3, i32 1 | |
43 %t2_2 = insertelement <4 x i32> %t2_1, i32 %a_2, i32 2 | |
44 %t2 = insertelement <4 x i32> %t2_2, i32 %b_2, i32 3 | |
45 ; X86: shufps {{.*}},0x30 | |
46 ; X86: shufps {{.*}},0x22 | |
47 ; X86: shufps {{.*}},0x88 | |
48 | |
49 call void @useV4I32(<4 x i32> %t2) | |
50 ; X86: call | |
51 | |
52 ret void | |
53 } | |
OLD | NEW |