OLD | NEW |
1 ; This is a smoke test of nop insertion. | 1 ; This is a smoke test of nop insertion. |
2 | 2 |
3 ; RUN: %llvm2ice -rng-seed=1 -nop-insertion -nop-insertion-percentage=50 \ | 3 ; RUN: %llvm2ice -rng-seed=1 -nop-insertion -nop-insertion-percentage=50 \ |
4 ; RUN: -max-nops-per-instruction=1 %s | FileCheck %s --check-prefix=PROB50 | 4 ; RUN: -max-nops-per-instruction=1 %s | FileCheck %s --check-prefix=PROB50 |
5 ; RUN: %llvm2ice -rng-seed=1 -nop-insertion -nop-insertion-percentage=90 \ | 5 ; RUN: %llvm2ice -rng-seed=1 -nop-insertion -nop-insertion-percentage=90 \ |
6 ; RUN: -max-nops-per-instruction=1 %s | FileCheck %s --check-prefix=PROB90 | 6 ; RUN: -max-nops-per-instruction=1 %s | FileCheck %s --check-prefix=PROB90 |
7 ; RUN: %llvm2ice -rng-seed=1 -nop-insertion -nop-insertion-percentage=50 \ | 7 ; RUN: %llvm2ice -rng-seed=1 -nop-insertion -nop-insertion-percentage=50 \ |
8 ; RUN: -max-nops-per-instruction=2 %s | FileCheck %s --check-prefix=MAXNOPS2 | 8 ; RUN: -max-nops-per-instruction=2 %s | FileCheck %s --check-prefix=MAXNOPS2 |
9 | 9 |
10 define <4 x i32> @mul_v4i32(<4 x i32> %a, <4 x i32> %b) { | 10 define <4 x i32> @mul_v4i32(<4 x i32> %a, <4 x i32> %b) { |
11 entry: | 11 entry: |
12 %res = mul <4 x i32> %a, %b | 12 %res = mul <4 x i32> %a, %b |
13 ret <4 x i32> %res | 13 ret <4 x i32> %res |
14 ; PROB50-LABEL: mul_v4i32: | 14 ; PROB50-LABEL: mul_v4i32: |
15 ; PROB50: nop # variant = 3 | 15 ; PROB50: nop # variant = 3 |
16 ; PROB50: sub esp, 60 | 16 ; PROB50: sub esp, 60 |
17 ; PROB50: nop # variant = 4 | 17 ; PROB50: nop # variant = 4 |
18 ; PROB50: movups xmmword ptr [esp+32], xmm0 | 18 ; PROB50: movups xmmword ptr [esp+32], xmm0 |
19 ; PROB50: movups xmmword ptr [esp+16], xmm1 | 19 ; PROB50: movups xmmword ptr [esp+16], xmm1 |
20 ; PROB50: nop # variant = 0 | 20 ; PROB50: nop # variant = 0 |
21 ; PROB50: movups xmm0, xmmword ptr [esp+32] | 21 ; PROB50: movups xmm0, xmmword ptr [esp+32] |
22 ; PROB50: nop # variant = 4 | 22 ; PROB50: nop # variant = 4 |
23 ; PROB50: pshufd xmm1, xmmword ptr [esp+32], 49 | 23 ; PROB50: pshufd xmm1, xmmword ptr [esp+32], 49 |
24 ; PROB50: pshufd xmm2, xmmword ptr [esp+16], 49 | 24 ; PROB50: pshufd xmm2, xmmword ptr [esp+16], 49 |
25 ; PROB50: pmuludq xmm0, xmmword ptr [esp+16] | 25 ; PROB50: pmuludq xmm0, xmmword ptr [esp+16] |
26 ; PROB50: pmuludq xmm1, xmm2 | 26 ; PROB50: pmuludq xmm1, xmm2 |
27 ; PROB50: nop # variant = 0 | 27 ; PROB50: nop # variant = 0 |
28 ; PROB50: shufps xmm0, xmm1, 136 | 28 ; PROB50: shufps xmm0, xmm1, 136 |
29 ; PROB50: pshufd xmm3, xmm0, 216 | 29 ; PROB50: pshufd xmm1, xmm0, 216 |
30 ; PROB50: nop # variant = 2 | 30 ; PROB50: nop # variant = 2 |
31 ; PROB50: movups xmmword ptr [esp], xmm3 | 31 ; PROB50: movups xmmword ptr [esp], xmm1 |
32 ; PROB50: movups xmm0, xmmword ptr [esp] | 32 ; PROB50: movups xmm0, xmmword ptr [esp] |
33 ; PROB50: add esp, 60 | 33 ; PROB50: add esp, 60 |
34 ; PROB50: nop # variant = 0 | 34 ; PROB50: nop # variant = 0 |
35 ; PROB50: ret | 35 ; PROB50: ret |
36 | 36 |
37 ; PROB90-LABEL: mul_v4i32: | 37 ; PROB90-LABEL: mul_v4i32: |
38 ; PROB90: nop # variant = 3 | 38 ; PROB90: nop # variant = 3 |
39 ; PROB90: sub esp, 60 | 39 ; PROB90: sub esp, 60 |
40 ; PROB90: nop # variant = 4 | 40 ; PROB90: nop # variant = 4 |
41 ; PROB90: movups xmmword ptr [esp+32], xmm0 | 41 ; PROB90: movups xmmword ptr [esp+32], xmm0 |
42 ; PROB90: nop # variant = 3 | 42 ; PROB90: nop # variant = 3 |
43 ; PROB90: movups xmmword ptr [esp+16], xmm1 | 43 ; PROB90: movups xmmword ptr [esp+16], xmm1 |
44 ; PROB90: nop # variant = 2 | 44 ; PROB90: nop # variant = 2 |
45 ; PROB90: movups xmm0, xmmword ptr [esp+32] | 45 ; PROB90: movups xmm0, xmmword ptr [esp+32] |
46 ; PROB90: nop # variant = 3 | 46 ; PROB90: nop # variant = 3 |
47 ; PROB90: pshufd xmm1, xmmword ptr [esp+32], 49 | 47 ; PROB90: pshufd xmm1, xmmword ptr [esp+32], 49 |
48 ; PROB90: nop # variant = 4 | 48 ; PROB90: nop # variant = 4 |
49 ; PROB90: pshufd xmm2, xmmword ptr [esp+16], 49 | 49 ; PROB90: pshufd xmm2, xmmword ptr [esp+16], 49 |
50 ; PROB90: nop # variant = 0 | 50 ; PROB90: nop # variant = 0 |
51 ; PROB90: pmuludq xmm0, xmmword ptr [esp+16] | 51 ; PROB90: pmuludq xmm0, xmmword ptr [esp+16] |
52 ; PROB90: nop # variant = 2 | 52 ; PROB90: nop # variant = 2 |
53 ; PROB90: pmuludq xmm1, xmm2 | 53 ; PROB90: pmuludq xmm1, xmm2 |
54 ; PROB90: nop # variant = 3 | 54 ; PROB90: nop # variant = 3 |
55 ; PROB90: shufps xmm0, xmm1, 136 | 55 ; PROB90: shufps xmm0, xmm1, 136 |
56 ; PROB90: nop # variant = 4 | 56 ; PROB90: nop # variant = 4 |
57 ; PROB90: pshufd xmm3, xmm0, 216 | 57 ; PROB90: pshufd xmm1, xmm0, 216 |
58 ; PROB90: nop # variant = 2 | 58 ; PROB90: nop # variant = 2 |
59 ; PROB90: movups xmmword ptr [esp], xmm3 | 59 ; PROB90: movups xmmword ptr [esp], xmm1 |
60 ; PROB90: nop # variant = 4 | 60 ; PROB90: nop # variant = 4 |
61 ; PROB90: movups xmm0, xmmword ptr [esp] | 61 ; PROB90: movups xmm0, xmmword ptr [esp] |
62 ; PROB90: nop # variant = 2 | 62 ; PROB90: nop # variant = 2 |
63 ; PROB90: add esp, 60 | 63 ; PROB90: add esp, 60 |
64 ; PROB90: nop # variant = 3 | 64 ; PROB90: nop # variant = 3 |
65 ; PROB90: ret | 65 ; PROB90: ret |
66 | 66 |
67 ; MAXNOPS2-LABEL: mul_v4i32: | 67 ; MAXNOPS2-LABEL: mul_v4i32: |
68 ; MAXNOPS2: sub esp, 60 | 68 ; MAXNOPS2: sub esp, 60 |
69 ; MAXNOPS2: nop # variant = 4 | 69 ; MAXNOPS2: nop # variant = 4 |
70 ; MAXNOPS2: movups xmmword ptr [esp+32], xmm0 | 70 ; MAXNOPS2: movups xmmword ptr [esp+32], xmm0 |
71 ; MAXNOPS2: nop # variant = 0 | 71 ; MAXNOPS2: nop # variant = 0 |
72 ; MAXNOPS2: nop # variant = 4 | 72 ; MAXNOPS2: nop # variant = 4 |
73 ; MAXNOPS2: movups xmmword ptr [esp+16], xmm1 | 73 ; MAXNOPS2: movups xmmword ptr [esp+16], xmm1 |
74 ; MAXNOPS2: movups xmm0, xmmword ptr [esp+32] | 74 ; MAXNOPS2: movups xmm0, xmmword ptr [esp+32] |
75 ; MAXNOPS2: nop # variant = 0 | 75 ; MAXNOPS2: nop # variant = 0 |
76 ; MAXNOPS2: pshufd xmm1, xmmword ptr [esp+32], 49 | 76 ; MAXNOPS2: pshufd xmm1, xmmword ptr [esp+32], 49 |
77 ; MAXNOPS2: nop # variant = 2 | 77 ; MAXNOPS2: nop # variant = 2 |
78 ; MAXNOPS2: pshufd xmm2, xmmword ptr [esp+16], 49 | 78 ; MAXNOPS2: pshufd xmm2, xmmword ptr [esp+16], 49 |
79 ; MAXNOPS2: pmuludq xmm0, xmmword ptr [esp+16] | 79 ; MAXNOPS2: pmuludq xmm0, xmmword ptr [esp+16] |
80 ; MAXNOPS2: nop # variant = 0 | 80 ; MAXNOPS2: nop # variant = 0 |
81 ; MAXNOPS2: nop # variant = 3 | 81 ; MAXNOPS2: nop # variant = 3 |
82 ; MAXNOPS2: pmuludq xmm1, xmm2 | 82 ; MAXNOPS2: pmuludq xmm1, xmm2 |
83 ; MAXNOPS2: shufps xmm0, xmm1, 136 | 83 ; MAXNOPS2: shufps xmm0, xmm1, 136 |
84 ; MAXNOPS2: pshufd xmm3, xmm0, 216 | 84 ; MAXNOPS2: pshufd xmm1, xmm0, 216 |
85 ; MAXNOPS2: nop # variant = 3 | 85 ; MAXNOPS2: nop # variant = 3 |
86 ; MAXNOPS2: movups xmmword ptr [esp], xmm3 | 86 ; MAXNOPS2: movups xmmword ptr [esp], xmm1 |
87 ; MAXNOPS2: nop # variant = 0 | 87 ; MAXNOPS2: nop # variant = 0 |
88 ; MAXNOPS2: movups xmm0, xmmword ptr [esp] | 88 ; MAXNOPS2: movups xmm0, xmmword ptr [esp] |
89 ; MAXNOPS2: nop # variant = 2 | 89 ; MAXNOPS2: nop # variant = 2 |
90 ; MAXNOPS2: add esp, 60 | 90 ; MAXNOPS2: add esp, 60 |
91 ; MAXNOPS2: nop # variant = 4 | 91 ; MAXNOPS2: nop # variant = 4 |
92 ; MAXNOPS2: ret | 92 ; MAXNOPS2: ret |
93 } | 93 } |
OLD | NEW |