| OLD | NEW |
| 1 ; This is a smoke test of nop insertion. | 1 ; This is a smoke test of nop insertion. |
| 2 | 2 |
| 3 ; REQUIRES: allow_dump | 3 ; REQUIRES: allow_dump |
| 4 | 4 |
| 5 ; Use filetype=asm because this currently depends on the # variant | 5 ; Use filetype=asm because this currently depends on the /* variant */ |
| 6 ; assembler comment. | 6 ; assembler comment. |
| 7 | 7 |
| 8 ; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \ | 8 ; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \ |
| 9 ; RUN: -nop-insertion-percentage=50 -max-nops-per-instruction=1 \ | 9 ; RUN: -nop-insertion-percentage=50 -max-nops-per-instruction=1 \ |
| 10 ; RUN: | FileCheck %s --check-prefix=PROB50 | 10 ; RUN: | FileCheck %s --check-prefix=PROB50 |
| 11 ; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \ | 11 ; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \ |
| 12 ; RUN: -nop-insertion-percentage=90 -max-nops-per-instruction=1 \ | 12 ; RUN: -nop-insertion-percentage=90 -max-nops-per-instruction=1 \ |
| 13 ; RUN: | FileCheck %s --check-prefix=PROB90 | 13 ; RUN: | FileCheck %s --check-prefix=PROB90 |
| 14 ; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \ | 14 ; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \ |
| 15 ; RUN: -nop-insertion-percentage=50 -max-nops-per-instruction=2 \ | 15 ; RUN: -nop-insertion-percentage=50 -max-nops-per-instruction=2 \ |
| 16 ; RUN: | FileCheck %s --check-prefix=MAXNOPS2 | 16 ; RUN: | FileCheck %s --check-prefix=MAXNOPS2 |
| 17 ; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion -sandbox\ | 17 ; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion -sandbox\ |
| 18 ; RUN: -nop-insertion-percentage=50 -max-nops-per-instruction=1 \ | 18 ; RUN: -nop-insertion-percentage=50 -max-nops-per-instruction=1 \ |
| 19 ; RUN: | FileCheck %s --check-prefix=SANDBOX50 | 19 ; RUN: | FileCheck %s --check-prefix=SANDBOX50 |
| 20 | 20 |
| 21 | 21 |
| 22 define internal <4 x i32> @mul_v4i32(<4 x i32> %a, <4 x i32> %b) { | 22 define internal <4 x i32> @mul_v4i32(<4 x i32> %a, <4 x i32> %b) { |
| 23 entry: | 23 entry: |
| 24 %res = mul <4 x i32> %a, %b | 24 %res = mul <4 x i32> %a, %b |
| 25 ret <4 x i32> %res | 25 ret <4 x i32> %res |
| 26 | 26 |
| 27 ; PROB50-LABEL: mul_v4i32 | 27 ; PROB50-LABEL: mul_v4i32 |
| 28 ; PROB50: nop # variant = 1 | 28 ; PROB50: nop /* variant = 1 */ |
| 29 ; PROB50: subl $60, %esp | 29 ; PROB50: subl $60, %esp |
| 30 ; PROB50: nop # variant = 3 | 30 ; PROB50: nop /* variant = 3 */ |
| 31 ; PROB50: movups %xmm0, 32(%esp) | 31 ; PROB50: movups %xmm0, 32(%esp) |
| 32 ; PROB50: movups %xmm1, 16(%esp) | 32 ; PROB50: movups %xmm1, 16(%esp) |
| 33 ; PROB50: movups 32(%esp), %xmm0 | 33 ; PROB50: movups 32(%esp), %xmm0 |
| 34 ; PROB50: nop # variant = 1 | 34 ; PROB50: nop /* variant = 1 */ |
| 35 ; PROB50: pshufd $49, 32(%esp), %xmm1 | 35 ; PROB50: pshufd $49, 32(%esp), %xmm1 |
| 36 ; PROB50: nop # variant = 4 | 36 ; PROB50: nop /* variant = 4 */ |
| 37 ; PROB50: pshufd $49, 16(%esp), %xmm2 | 37 ; PROB50: pshufd $49, 16(%esp), %xmm2 |
| 38 ; PROB50: nop # variant = 1 | 38 ; PROB50: nop /* variant = 1 */ |
| 39 ; PROB50: pmuludq 16(%esp), %xmm0 | 39 ; PROB50: pmuludq 16(%esp), %xmm0 |
| 40 ; PROB50: pmuludq %xmm2, %xmm1 | 40 ; PROB50: pmuludq %xmm2, %xmm1 |
| 41 ; PROB50: nop # variant = 0 | 41 ; PROB50: nop /* variant = 0 */ |
| 42 ; PROB50: shufps $136, %xmm1, %xmm0 | 42 ; PROB50: shufps $136, %xmm1, %xmm0 |
| 43 ; PROB50: nop # variant = 3 | 43 ; PROB50: nop /* variant = 3 */ |
| 44 ; PROB50: pshufd $216, %xmm0, %xmm0 | 44 ; PROB50: pshufd $216, %xmm0, %xmm0 |
| 45 ; PROB50: nop # variant = 1 | 45 ; PROB50: nop /* variant = 1 */ |
| 46 ; PROB50: movups %xmm0, (%esp) | 46 ; PROB50: movups %xmm0, (%esp) |
| 47 ; PROB50: movups (%esp), %xmm0 | 47 ; PROB50: movups (%esp), %xmm0 |
| 48 ; PROB50: addl $60, %esp | 48 ; PROB50: addl $60, %esp |
| 49 ; PROB50: ret | 49 ; PROB50: ret |
| 50 | 50 |
| 51 ; PROB90-LABEL: mul_v4i32 | 51 ; PROB90-LABEL: mul_v4i32 |
| 52 ; PROB90: nop # variant = 1 | 52 ; PROB90: nop /* variant = 1 */ |
| 53 ; PROB90: subl $60, %esp | 53 ; PROB90: subl $60, %esp |
| 54 ; PROB90: nop # variant = 3 | 54 ; PROB90: nop /* variant = 3 */ |
| 55 ; PROB90: movups %xmm0, 32(%esp) | 55 ; PROB90: movups %xmm0, 32(%esp) |
| 56 ; PROB90: nop # variant = 4 | 56 ; PROB90: nop /* variant = 4 */ |
| 57 ; PROB90: movups %xmm1, 16(%esp) | 57 ; PROB90: movups %xmm1, 16(%esp) |
| 58 ; PROB90: nop # variant = 1 | 58 ; PROB90: nop /* variant = 1 */ |
| 59 ; PROB90: movups 32(%esp), %xmm0 | 59 ; PROB90: movups 32(%esp), %xmm0 |
| 60 ; PROB90: nop # variant = 4 | 60 ; PROB90: nop /* variant = 4 */ |
| 61 ; PROB90: pshufd $49, 32(%esp), %xmm1 | 61 ; PROB90: pshufd $49, 32(%esp), %xmm1 |
| 62 ; PROB90: nop # variant = 1 | 62 ; PROB90: nop /* variant = 1 */ |
| 63 ; PROB90: pshufd $49, 16(%esp), %xmm2 | 63 ; PROB90: pshufd $49, 16(%esp), %xmm2 |
| 64 ; PROB90: nop # variant = 4 | 64 ; PROB90: nop /* variant = 4 */ |
| 65 ; PROB90: pmuludq 16(%esp), %xmm0 | 65 ; PROB90: pmuludq 16(%esp), %xmm0 |
| 66 ; PROB90: nop # variant = 2 | 66 ; PROB90: nop /* variant = 2 */ |
| 67 ; PROB90: pmuludq %xmm2, %xmm1 | 67 ; PROB90: pmuludq %xmm2, %xmm1 |
| 68 ; PROB90: shufps $136, %xmm1, %xmm0 | 68 ; PROB90: shufps $136, %xmm1, %xmm0 |
| 69 ; PROB90: nop # variant = 1 | 69 ; PROB90: nop /* variant = 1 */ |
| 70 ; PROB90: pshufd $216, %xmm0, %xmm0 | 70 ; PROB90: pshufd $216, %xmm0, %xmm0 |
| 71 ; PROB90: movups %xmm0, (%esp) | 71 ; PROB90: movups %xmm0, (%esp) |
| 72 ; PROB90: nop # variant = 1 | 72 ; PROB90: nop /* variant = 1 */ |
| 73 ; PROB90: movups (%esp), %xmm0 | 73 ; PROB90: movups (%esp), %xmm0 |
| 74 ; PROB90: nop # variant = 0 | 74 ; PROB90: nop /* variant = 0 */ |
| 75 ; PROB90: addl $60, %esp | 75 ; PROB90: addl $60, %esp |
| 76 ; PROB90: nop # variant = 0 | 76 ; PROB90: nop /* variant = 0 */ |
| 77 ; PROB90: ret | 77 ; PROB90: ret |
| 78 ; PROB90: nop # variant = 4 | 78 ; PROB90: nop /* variant = 4 */ |
| 79 | 79 |
| 80 ; MAXNOPS2-LABEL: mul_v4i32 | 80 ; MAXNOPS2-LABEL: mul_v4i32 |
| 81 ; MAXNOPS2: nop # variant = 1 | 81 ; MAXNOPS2: nop /* variant = 1 */ |
| 82 ; MAXNOPS2: nop # variant = 3 | 82 ; MAXNOPS2: nop /* variant = 3 */ |
| 83 ; MAXNOPS2: subl $60, %esp | 83 ; MAXNOPS2: subl $60, %esp |
| 84 ; MAXNOPS2: movups %xmm0, 32(%esp) | 84 ; MAXNOPS2: movups %xmm0, 32(%esp) |
| 85 ; MAXNOPS2: nop # variant = 1 | 85 ; MAXNOPS2: nop /* variant = 1 */ |
| 86 ; MAXNOPS2: nop # variant = 4 | 86 ; MAXNOPS2: nop /* variant = 4 */ |
| 87 ; MAXNOPS2: movups %xmm1, 16(%esp) | 87 ; MAXNOPS2: movups %xmm1, 16(%esp) |
| 88 ; MAXNOPS2: nop # variant = 1 | 88 ; MAXNOPS2: nop /* variant = 1 */ |
| 89 ; MAXNOPS2: movups 32(%esp), %xmm0 | 89 ; MAXNOPS2: movups 32(%esp), %xmm0 |
| 90 ; MAXNOPS2: nop # variant = 0 | 90 ; MAXNOPS2: nop /* variant = 0 */ |
| 91 ; MAXNOPS2: nop # variant = 3 | 91 ; MAXNOPS2: nop /* variant = 3 */ |
| 92 ; MAXNOPS2: pshufd $49, 32(%esp), %xmm1 | 92 ; MAXNOPS2: pshufd $49, 32(%esp), %xmm1 |
| 93 ; MAXNOPS2: nop # variant = 1 | 93 ; MAXNOPS2: nop /* variant = 1 */ |
| 94 ; MAXNOPS2: pshufd $49, 16(%esp), %xmm2 | 94 ; MAXNOPS2: pshufd $49, 16(%esp), %xmm2 |
| 95 ; MAXNOPS2: pmuludq 16(%esp), %xmm0 | 95 ; MAXNOPS2: pmuludq 16(%esp), %xmm0 |
| 96 ; MAXNOPS2: pmuludq %xmm2, %xmm1 | 96 ; MAXNOPS2: pmuludq %xmm2, %xmm1 |
| 97 ; MAXNOPS2: nop # variant = 0 | 97 ; MAXNOPS2: nop /* variant = 0 */ |
| 98 ; MAXNOPS2: shufps $136, %xmm1, %xmm0 | 98 ; MAXNOPS2: shufps $136, %xmm1, %xmm0 |
| 99 ; MAXNOPS2: nop # variant = 0 | 99 ; MAXNOPS2: nop /* variant = 0 */ |
| 100 ; MAXNOPS2: nop # variant = 0 | 100 ; MAXNOPS2: nop /* variant = 0 */ |
| 101 ; MAXNOPS2: pshufd $216, %xmm0, %xmm0 | 101 ; MAXNOPS2: pshufd $216, %xmm0, %xmm0 |
| 102 ; MAXNOPS2: nop # variant = 1 | 102 ; MAXNOPS2: nop /* variant = 1 */ |
| 103 ; MAXNOPS2: nop # variant = 3 | 103 ; MAXNOPS2: nop /* variant = 3 */ |
| 104 ; MAXNOPS2: movups %xmm0, (%esp) | 104 ; MAXNOPS2: movups %xmm0, (%esp) |
| 105 ; MAXNOPS2: nop # variant = 3 | 105 ; MAXNOPS2: nop /* variant = 3 */ |
| 106 ; MAXNOPS2: movups (%esp), %xmm0 | 106 ; MAXNOPS2: movups (%esp), %xmm0 |
| 107 ; MAXNOPS2: addl $60, %esp | 107 ; MAXNOPS2: addl $60, %esp |
| 108 ; MAXNOPS2: nop # variant = 3 | 108 ; MAXNOPS2: nop /* variant = 3 */ |
| 109 ; MAXNOPS2: ret | 109 ; MAXNOPS2: ret |
| 110 | 110 |
| 111 | 111 |
| 112 ; SANDBOX50-LABEL: mul_v4i32 | 112 ; SANDBOX50-LABEL: mul_v4i32 |
| 113 ; SANDBOX50: nop # variant = 1 | 113 ; SANDBOX50: nop /* variant = 1 */ |
| 114 ; SANDBOX50: subl $60, %esp | 114 ; SANDBOX50: subl $60, %esp |
| 115 ; SANDBOX50: nop # variant = 3 | 115 ; SANDBOX50: nop /* variant = 3 */ |
| 116 ; SANDBOX50: movups %xmm0, 32(%esp) | 116 ; SANDBOX50: movups %xmm0, 32(%esp) |
| 117 ; SANDBOX50: movups %xmm1, 16(%esp) | 117 ; SANDBOX50: movups %xmm1, 16(%esp) |
| 118 ; SANDBOX50: movups 32(%esp), %xmm0 | 118 ; SANDBOX50: movups 32(%esp), %xmm0 |
| 119 ; SANDBOX50: nop # variant = 1 | 119 ; SANDBOX50: nop /* variant = 1 */ |
| 120 ; SANDBOX50: pshufd $49, 32(%esp), %xmm1 | 120 ; SANDBOX50: pshufd $49, 32(%esp), %xmm1 |
| 121 ; SANDBOX50: nop # variant = 4 | 121 ; SANDBOX50: nop /* variant = 4 */ |
| 122 ; SANDBOX50: pshufd $49, 16(%esp), %xmm2 | 122 ; SANDBOX50: pshufd $49, 16(%esp), %xmm2 |
| 123 ; SANDBOX50: nop # variant = 1 | 123 ; SANDBOX50: nop /* variant = 1 */ |
| 124 ; SANDBOX50: pmuludq 16(%esp), %xmm0 | 124 ; SANDBOX50: pmuludq 16(%esp), %xmm0 |
| 125 ; SANDBOX50: pmuludq %xmm2, %xmm1 | 125 ; SANDBOX50: pmuludq %xmm2, %xmm1 |
| 126 ; SANDBOX50: nop # variant = 0 | 126 ; SANDBOX50: nop /* variant = 0 */ |
| 127 ; SANDBOX50: shufps $136, %xmm1, %xmm0 | 127 ; SANDBOX50: shufps $136, %xmm1, %xmm0 |
| 128 ; SANDBOX50: nop # variant = 3 | 128 ; SANDBOX50: nop /* variant = 3 */ |
| 129 ; SANDBOX50: pshufd $216, %xmm0, %xmm0 | 129 ; SANDBOX50: pshufd $216, %xmm0, %xmm0 |
| 130 ; SANDBOX50: nop # variant = 1 | 130 ; SANDBOX50: nop /* variant = 1 */ |
| 131 ; SANDBOX50: movups %xmm0, (%esp) | 131 ; SANDBOX50: movups %xmm0, (%esp) |
| 132 ; SANDBOX50: movups (%esp), %xmm0 | 132 ; SANDBOX50: movups (%esp), %xmm0 |
| 133 ; SANDBOX50: addl $60, %esp | 133 ; SANDBOX50: addl $60, %esp |
| 134 ; SANDBOX50: pop %ecx | 134 ; SANDBOX50: pop %ecx |
| 135 ; SANDBOX50: .bundle_lock | 135 ; SANDBOX50: .bundle_lock |
| 136 ; SANDBOX50: andl $-32, %ecx | 136 ; SANDBOX50: andl $-32, %ecx |
| 137 ; SANDBOX50: jmp *%ecx | 137 ; SANDBOX50: jmp *%ecx |
| 138 ; SANDBOX50: .bundle_unlock | 138 ; SANDBOX50: .bundle_unlock |
| 139 | 139 |
| 140 } | 140 } |
| OLD | NEW |