OLD | NEW |
(Empty) | |
| 1 ; RUN: opt -pnacl-abi-simplify-postopt %s -S | \ |
| 2 ; RUN: opt -backend-canonicalize -S | FileCheck %s |
| 3 |
| 4 ; Test that the SIMD game of life example from the NaCl SDK has an inner loop |
| 5 ; that contains the expected shufflevector instructions. First run the ABI |
| 6 ; simplifications on the code, then run the translator's peepholes. |
| 7 ; |
| 8 ; The stable PNaCl bitcode ABI doesn't have shufflevector nor constant vectors, |
| 9 ; it instead has insertelement, extractelement and load from globals. Note that |
| 10 ; `undef` becomes `0` in the constants. |
| 11 |
| 12 ; The datalayout is needed to determine the alignment of the globals. |
| 13 target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64
:64:64-p:32:32:32-v128:32:32" |
| 14 |
| 15 define <16 x i8> @InnerLoop(<16 x i8>* %pixel_line, <16 x i8> %src00, <16 x i8>
%src01, <16 x i8> %src10, <16 x i8> %src11, <16 x i8> %src20, <16 x i8> %src21)
{ |
| 16 ; CHECK-LABEL: InnerLoop |
| 17 ; CHECK-NEXT: shufflevector <16 x i8> %src00, <16 x i8> %src01, <16 x i32> <i3
2 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32
12, i32 13, i32 14, i32 15, i32 16> |
| 18 ; CHECK-NEXT: shufflevector <16 x i8> %src00, <16 x i8> %src01, <16 x i32> <i3
2 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i3
2 13, i32 14, i32 15, i32 16, i32 17> |
| 19 ; CHECK-NEXT: shufflevector <16 x i8> %src10, <16 x i8> %src11, <16 x i32> <i3
2 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32
12, i32 13, i32 14, i32 15, i32 16> |
| 20 ; CHECK-NEXT: shufflevector <16 x i8> %src10, <16 x i8> %src11, <16 x i32> <i3
2 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i3
2 13, i32 14, i32 15, i32 16, i32 17> |
| 21 ; CHECK-NEXT: shufflevector <16 x i8> %src20, <16 x i8> %src21, <16 x i32> <i3
2 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32
12, i32 13, i32 14, i32 15, i32 16> |
| 22 ; CHECK-NEXT: shufflevector <16 x i8> %src20, <16 x i8> %src21, <16 x i32> <i3
2 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i3
2 13, i32 14, i32 15, i32 16, i32 17> |
| 23 ; CHECK-NOT: load |
| 24 ; CHECK-NOT: insertelement |
| 25 ; CHECK-NOT: extractelement |
| 26 %shuffle = shufflevector <16 x i8> %src00, <16 x i8> %src01, <16 x i32> <i32 1
, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12
, i32 13, i32 14, i32 15, i32 16> |
| 27 %shuffle3 = shufflevector <16 x i8> %src00, <16 x i8> %src01, <16 x i32> <i32
2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32
13, i32 14, i32 15, i32 16, i32 17> |
| 28 %shuffle4 = shufflevector <16 x i8> %src10, <16 x i8> %src11, <16 x i32> <i32
1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 1
2, i32 13, i32 14, i32 15, i32 16> |
| 29 %shuffle5 = shufflevector <16 x i8> %src10, <16 x i8> %src11, <16 x i32> <i32
2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32
13, i32 14, i32 15, i32 16, i32 17> |
| 30 %shuffle6 = shufflevector <16 x i8> %src20, <16 x i8> %src21, <16 x i32> <i32
1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 1
2, i32 13, i32 14, i32 15, i32 16> |
| 31 %shuffle7 = shufflevector <16 x i8> %src20, <16 x i8> %src21, <16 x i32> <i32
2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32
13, i32 14, i32 15, i32 16, i32 17> |
| 32 %add = add <16 x i8> %shuffle, %src00 |
| 33 %add8 = add <16 x i8> %add, %shuffle3 |
| 34 %add9 = add <16 x i8> %add8, %src10 |
| 35 %add10 = add <16 x i8> %add9, %shuffle5 |
| 36 %add11 = add <16 x i8> %add10, %src20 |
| 37 %add12 = add <16 x i8> %add11, %shuffle6 |
| 38 %add13 = add <16 x i8> %add12, %shuffle7 |
| 39 %add14 = shl <16 x i8> %add13, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1
, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> |
| 40 %add15 = add <16 x i8> %add14, %shuffle4 |
| 41 %cmp = icmp ugt <16 x i8> %add15, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i
8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4> |
| 42 %sext = sext <16 x i1> %cmp to <16 x i8> |
| 43 %cmp16 = icmp ult <16 x i8> %add15, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8,
i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8> |
| 44 ; CHECK: select |
| 45 %and = select <16 x i1> %cmp16, <16 x i8> %sext, <16 x i8> zeroinitializer |
| 46 ; CHECK-NEXT: shufflevector <16 x i8> %and, <16 x i8> <i8 0, i8 -1, i8 0, i8 0
, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x
i32> <i32 16, i32 0, i32 16, i32 17, i32 16, i32 1, i32 16, i32 17, i32 16, i32
2, i32 16, i32 17, i32 16, i32 3, i32 16, i32 17> |
| 47 ; CHECK-NEXT: shufflevector <16 x i8> %and, <16 x i8> <i8 0, i8 -1, i8 0, i8 0
, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x
i32> <i32 16, i32 4, i32 16, i32 17, i32 16, i32 5, i32 16, i32 17, i32 16, i32
6, i32 16, i32 17, i32 16, i32 7, i32 16, i32 17> |
| 48 ; CHECK-NEXT: shufflevector <16 x i8> %and, <16 x i8> <i8 0, i8 -1, i8 0, i8 0
, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x
i32> <i32 16, i32 8, i32 16, i32 17, i32 16, i32 9, i32 16, i32 17, i32 16, i32
10, i32 16, i32 17, i32 16, i32 11, i32 16, i32 17> |
| 49 ; CHECK-NEXT: shufflevector <16 x i8> %and, <16 x i8> <i8 0, i8 -1, i8 0, i8 0
, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x
i32> <i32 16, i32 12, i32 16, i32 17, i32 16, i32 13, i32 16, i32 17, i32 16, i
32 14, i32 16, i32 17, i32 16, i32 15, i32 16, i32 17> |
| 50 ; CHECK-NOT: load |
| 51 ; CHECK-NOT: insertelement |
| 52 ; CHECK-NOT: extractelement |
| 53 %shuffle18 = shufflevector <16 x i8> %and, <16 x i8> <i8 0, i8 -1, i8 undef, i
8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i
8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 0, i32
16, i32 17, i32 16, i32 1, i32 16, i32 17, i32 16, i32 2, i32 16, i32 17, i32 1
6, i32 3, i32 16, i32 17> |
| 54 %shuffle19 = shufflevector <16 x i8> %and, <16 x i8> <i8 0, i8 -1, i8 undef, i
8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i
8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 4, i32
16, i32 17, i32 16, i32 5, i32 16, i32 17, i32 16, i32 6, i32 16, i32 17, i32 1
6, i32 7, i32 16, i32 17> |
| 55 %shuffle20 = shufflevector <16 x i8> %and, <16 x i8> <i8 0, i8 -1, i8 undef, i
8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i
8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 8, i32
16, i32 17, i32 16, i32 9, i32 16, i32 17, i32 16, i32 10, i32 16, i32 17, i32
16, i32 11, i32 16, i32 17> |
| 56 %shuffle21 = shufflevector <16 x i8> %and, <16 x i8> <i8 0, i8 -1, i8 undef, i
8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i
8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 12, i3
2 16, i32 17, i32 16, i32 13, i32 16, i32 17, i32 16, i32 14, i32 16, i32 17, i3
2 16, i32 15, i32 16, i32 17> |
| 57 store <16 x i8> %shuffle18, <16 x i8>* %pixel_line, align 16 |
| 58 %add.ptr22 = getelementptr inbounds <16 x i8>* %pixel_line, i32 1 |
| 59 store <16 x i8> %shuffle19, <16 x i8>* %add.ptr22, align 16 |
| 60 %add.ptr23 = getelementptr inbounds <16 x i8>* %pixel_line, i32 2 |
| 61 store <16 x i8> %shuffle20, <16 x i8>* %add.ptr23, align 16 |
| 62 %add.ptr24 = getelementptr inbounds <16 x i8>* %pixel_line, i32 3 |
| 63 store <16 x i8> %shuffle21, <16 x i8>* %add.ptr24, align 16 |
| 64 %and25 = and <16 x i8> %and, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> |
| 65 ret <16 x i8> %and25 |
| 66 } |
OLD | NEW |